Loading packages
#===============================================================================
#BTC.LineZero.Header.1.1.0
#===============================================================================
#R Markdown environment setup and reporting utility.
#===============================================================================
#RLB.Dependencies:
# knitr, magrittr, pacman, rio, rmarkdown, rmdformats, tibble, yaml
#===============================================================================
#Input for document parameters, libraries, file paths, and options.
#=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=
knitr::opts_chunk$set(message=FALSE, warning = FALSE)
path_working <- "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/"
path_library <- "/Library/Frameworks/R.framework/Resources/library"
str_libraries <- c(
"readxl", "phyloseq", "tidyverse", "pacman", "yaml"
)
path_working <- "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git"
path_library <- "/Library/Frameworks/R.framework/Resources/library"
str_libraries <- c("readxl", "phyloseq", "tidyverse", "pacman", "yaml", "ggplot2", "vegan", "microbiome", "ggpubr", "viridis", "decontam", "gridExtra", "ggpubr", "lme4", "lmerTest", "writexl", "harrietr", "Maaslin2", "ggtext", "ggpmisc", "gridExtra", "gamm4", "reshape2", "kableExtra", "knitr", "ggtree", "car")
YAML_header <-
'---
title: "Host-DNA depletion 1: data wrangling"
author: "Minsik Kim"
date: "2032.03.10"
output:
rmdformats::downcute:
downcute_theme: "chaos"
code_folding: hide
fig_width: 6
fig_height: 6
---'
seed <- "20230330"
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Loads libraries, file paths, and other document options.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
FUN.LineZero.Boot <- function() {
.libPaths(path_library)
require(pacman)
pacman::p_load(c("knitr", "rmarkdown", "rmdformats", "yaml"))
knitr::opts_knit$set(root.dir = path_working)
str_libraries |> unique() |> sort() -> str_libraries
pacman::p_load(char = str_libraries)
set.seed(seed)
}
FUN.LineZero.Boot()
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Outputs R environment report.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
FUN.LineZero.Report <- function() {
cat("Line Zero Environment:\n\n")
paste("R:", pacman::p_version(), "\n") |> cat()
cat("Libraries:\n")
for (str_libraries in str_libraries) {
paste(
" ", str_libraries, ": ", pacman::p_version(package = str_libraries),
"\n", sep = ""
) |> cat()
}
paste("\nOperating System:", pacman::p_detectOS(), "\n") |> cat()
paste(" Library Path:", path_library, "\n") |> cat()
paste(" Working Path:", path_working, "\n") |> cat()
paste("Seed:", seed, "\n\n") |> cat()
cat("YAML Header:\n")
cat(YAML_header)
}
FUN.LineZero.Report()
## Line Zero Environment:
##
## R: 4.2.2
## Libraries:
## readxl: 1.4.2
## phyloseq: 1.40.0
## tidyverse: 2.0.0
## pacman: 0.5.1
## yaml: 2.3.7
## ggplot2: 3.4.1
## vegan: 2.6.4
## microbiome: 1.18.0
## ggpubr: 0.6.0
## viridis: 0.6.2
## decontam: 1.16.0
## gridExtra: 2.3
## ggpubr: 0.6.0
## lme4: 1.1.31
## lmerTest: 3.1.3
## writexl: 1.4.2
## harrietr: 0.2.3
## Maaslin2: 1.10.0
## ggtext: 0.1.2
## ggpmisc: 0.5.2
## gridExtra: 2.3
## gamm4: 0.2.6
## reshape2: 1.4.4
## kableExtra: 1.3.4
## knitr: 1.42
## ggtree: 3.4.4
## car: 3.1.1
##
## Operating System: Darwin
## Library Path: /Library/Frameworks/R.framework/Resources/library
## Working Path: /Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git
## Seed: 20230330
##
## YAML Header:
## ---
## title: "Host-DNA depletion 1: data wrangling"
## author: "Minsik Kim"
## date: "2032.03.10"
## output:
## rmdformats::downcute:
## downcute_theme: "chaos"
## code_folding: hide
## fig_width: 6
## fig_height: 6
## ---
Script description
1. Loading data
1.1. phyloseq obejct
1.2. qPCR data (controls)
2. QC
QC1. How many samples failed sequencing
QC2. How were changes in read stats and host DNA proportion?
QC3. How were the extraction controls
QC4. Prevalence / abundance filtering - red flag
3. Analysis
A0. Calculation of alpha-diversity indices
A1. Host DNA, bacterial DNA and % host
A2. Modeling of sequencing results
A3. Taxa alpha diversity
A4. Taxa beta diversity
Intermediate results
A5. DA analysis for taxa
A6. Decontam
A7. LM of function alpha diversity (BPI)
A8. permanova of function alpha diversity
A9. DA for function
Data inputs
Meta data
qPCR - bacteria
qPCR - human
qPCR host %
Raw reads
final reads
sequencing host %
library prep failure status
Raw reads
subject_id
treatment
sample_type
subject_id
Sequencing result
samples
controls
Loading data
# Loading files -----------------------------------------------------------
#loading tidy phyloseq object
phyloseq <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/Phyloseq/PHY_20221129_MGK_host_tidy_tax.rds")
####Need to be removed after adding sequenicing data
data_qPCR <- read_csv("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Tables/DAT_20230122_MGK_host_control_qPCR.csv")
#qPCR data of all the samples sent out sequencing
data_qPCR <- subset(data_qPCR, data_qPCR$baylor_other_id %in% c(sample_names(phyloseq$phyloseq_count)) | data_qPCR$baylor_other_id %in% c(read_excel("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_Baylor/3_Documentation/Communications/2023-01-24_baylor_shipping_sicas2_nasal_host_depleted/CMMR_MetadataCapture_20230124_LaiP-PQ00430_SICAS2_NS.xlsx", skip = 27) %>% data.frame %>% .$`Optional..............secondary.ID`))
data_qPCR <- subset(data_qPCR, data_qPCR$sample_type %in% c("BAL", "nasal_swab", "Sputum", "neg_depletion", "pos_depletion"))
data_qPCR$sample_type <- factor(data_qPCR$sample_type, levels = c("BAL", "nasal_swab", "Sputum", "pos_depletion", "neg_depletion"),
labels = c("BAL", "Nasal swab", "Sputum", "P depletion", "N depletion"))
data_qPCR$treatment <- factor(data_qPCR$treatment, levels = c("control", "lyPMA", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("Control", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"))
#sample data loading
sample_data <- sample_data(phyloseq$phyloseq_count)
Q1. How were sequencing results?
Histogram of reads counts and host %
Figure - regular scale
Raw scale is not normally distributed
# Initail QC --------------------------------------------------------------
#Quesetions - QC
#Q0. How many samples failed in sequencing
## figures -----raw data
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
mutate(host_seq_percent = sequencing_host_prop * 100,
.after = sequencing_host_prop,) %>%
gather(feature, value, Raw_reads:host_seq_percent) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent")) %>%
mutate(feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent"), labels = c("Raw reads", "Host mapped", "Final reads", "Host %"))) %>%
ggplot(aes(x = value, fill = treatment)) +
geom_histogram(bins = 97) +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
facet_grid(sample_type~feature, scales = "free") +
ggtitle("log10 transfromed histrogram") +
theme_classic() +
theme(legend.position = "top")
Figure - log10 scale
log transform is adquate for read counts
Host% is not transfromed well
## figures -----log10
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
mutate(host_seq_percent = 100 * sequencing_host_prop,
.after = sequencing_host_prop,) %>%
gather(feature, value, Raw_reads:host_seq_percent) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent")) %>%
mutate(feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent"), labels = c("Raw reads", "Host mapped", "Final reads", "Host %"))) %>%
ggplot(aes(x = log10(value), fill = treatment)) +
geom_histogram(bins = 97) +
facet_grid(sample_type~feature, scales = "free") +
ggtitle("log10 transformed") +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
theme_classic() +
theme(legend.position = "top")
Figure - scaling host proportion
Raw % will be used for host%
## figures -----log10
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
mutate(host_seq_percent = 100 * sequencing_host_prop,
log_seq_percent = log10(host_seq_percent),
sqrt_seq_percent = sqrt(host_seq_percent),
.after = sequencing_host_prop,) %>%
gather(feature, value, Raw_reads:sqrt_seq_percent) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("host_seq_percent", "log_seq_percent", "sqrt_seq_percent")) %>%
mutate(feature = factor(feature, levels = c("host_seq_percent", "log_seq_percent", "sqrt_seq_percent"), labels = c("Host %", "log10 (host %)", "Sqrt(host %)"))) %>%
ggplot(aes(x = value, fill = treatment)) +
geom_histogram(bins = 97) +
facet_grid(sample_type~feature, scales = "free") +
ggtitle("Host % transfromed (raw, log10, and sqrt) histrogram") +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
theme_classic() +
theme(legend.position = "top")
Figure - log10 scale by treatment
ggarrange(ggplot(sample_data %>% subset(., !is.na(.$subject_id)) %>% data.frame(), aes(x = Final_reads, fill = treatment)) +
geom_histogram(bins = 97) +
facet_wrap(~sample_type) +
theme_classic(base_family = "serif") +
ggtitle("Histogram of final reads by sample type and treatment") +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment"),
ggplot(sample_data %>% subset(., !is.na(.$subject_id)) %>% data.frame(), aes(x = log10(Final_reads), fill = treatment)) +
geom_histogram(bins = 97) +
facet_wrap(~sample_type) +
theme_classic(base_family = "serif") +
ggtitle("Histogram of log10(final reads) by sample type and treatment") +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment"),
common.legend = T, ncol = 1)
Histogram (sum of OTU table)
2 samples showed 0 reads in sum(OTU)
hist((log10((phyloseq$phyloseq_count %>% otu_table %>% colSums()) + 1)),100, main = "Histogram of total reads (sum of OTU table)") # 2 samples showed 0 total reads (sum of otu_table)
Final reads of library prep failed samples
Some samples did not pass library prep QC, but showed reasonable final reads
#how were the samples failed in library prep?
sample_data %>% data.frame %>% mutate(total_read = phyloseq$phyloseq_count %>% otu_table %>% colSums()) %>% rownames_to_column(var = "baylor_other_id") %>%
ggplot(aes(x = reorder(baylor_other_id, -total_read),
y = log10(total_read + 1),
col = lib_failed)) +
geom_point() +
theme_classic(base_family = "serif") +
theme(axis.title.y = element_markdown(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 4)) +
ylab("log<sub>10</sub>(Sum of OTU table reads)") +
xlab("Sample ID") +
guides(col=guide_legend(title="Library failed")) +
ggtitle("Sum of OTU reads by library failure status")
List of samples failed in sequencing
2 BAL samples (control and lyPMA group) failed in sequencing
sample_data %>% data.frame %>% filter(phyloseq$phyloseq_count %>% otu_table %>% colSums() == 0) # two BAL sampels showed 0 total reads
#sample_data(phyloseq$phyloseq_count) %>% data.frame() %>% subset(., .$lib_failed)
QC 1 Results:
1.1 Modeling final read should be conducted with log transfrom. Host % need no transformation.
1.2 13 samples failed in library prep
1.3. Two BAL sampels showed 0 total reads
1.4. Sequencing fail ≠ library prep fail
Comments from Baylor:
Q: What was the lab’s criteria for deciding which samples failed library prep.? There were 13 samples that you pointed as failed but their sequencing result actually looks pretty good (ie similar to samples that didn’t fail library prep)
A: To determine whether a library attempt “passed or failed” the lab looks at the picogreen concentrations and a library fragment size distribution trace. The trace files are an output from either the Fragment Analyzer or TapeStation (a copy of the trace files for PQ00331 is attached). If a sample has a background level pico concentration and no discernable fragment concentration on the trace (i.e. a flat trace line) it is considered failed library. If the sample is below the level of detection of our standard library QC methods, it is considered failure. It’s still possible that there is some small amounts of library in those samples that were successfully sequenced, but often those samples do not generate a meaningful amount of sequence data.
QC2 Chagnes of reads and host % by treatment
For detailed analysis, sequencing matrices were analyzed by each sample type and treatment
Reads and host % by treatment
QC table by treated (binary)
Changes in matrices were observed
#sequencing result by sample type and control (1/0)
options(dplyr.summarise.inform = FALSE)
sample_data %>% data.frame() %>%
group_by(sample_type, treated) %>%
summarise(N = n(),
`Raw reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Raw_reads/10000000),2),nsmall = 2, big.mark = ","), " [", format(round(quantile(Raw_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Raw_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Host_mapped/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Host_mapped/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Host_mapped/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads proportion<br>(median [IQR])<br>[%]` = paste(format(round(median(sequencing_host_prop * 100),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(sequencing_host_prop * 100, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(sequencing_host_prop * 100, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Final reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Final_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Final_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Final_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
) %>%
rename(`Sample type` = sample_type, Treated = treated) %>%
data.frame(check.names = F) %>% mutate_all(linebreak) %>% kbl(format = "html", escape = F) %>% kable_styling(full_width = 0, html_font = "serif")
| Sample type | Treated | N |
Raw reads (median [IQR]) [reads x 107] |
Host reads (median [IQR]) [reads x 107] |
Host reads proportion (median [IQR]) [%] |
Final reads (median [IQR]) [reads x 107] |
|---|---|---|---|---|---|---|
| BAL | 0 | 5 | 15.73 [6.35, 15.92] | 12.92 [5.21, 12.94] | 82.14 [82.12, 82.18] | 0.03 [0.03, 0.04] |
| BAL | 1 | 25 | 6.17 [4.57, 17.43] | 4.65 [2.78, 12.80] | 75.59 [69.38, 78.44] | 0.17 [0.10, 0.37] |
| Nasal swab | 0 | 10 | 13.09 [7.73, 16.93] | 10.05 [6.11, 13.04] | 77.34 [76.89, 79.45] | 0.48 [0.10, 0.87] |
| Nasal swab | 1 | 25 | 4.08 [0.99, 6.40] | 0.81 [0.26, 1.36] | 27.46 [13.51, 64.58] | 0.97 [0.17, 3.42] |
| Sputum | 0 | 5 | 8.59 [8.25, 9.27] | 6.87 [6.69, 7.50] | 80.61 [79.92, 80.89] | 0.06 [0.06, 0.09] |
| Sputum | 1 | 25 | 12.23 [10.34, 13.73] | 7.71 [3.76, 8.82] | 58.64 [39.71, 74.93] | 1.16 [0.47, 4.19] |
| Neg. ext. | 0 | 1 | 0.02 [0.02, 0.02] | 0.00 [0.00, 0.00] | 3.13 [3.13, 3.13] | 0.02 [0.02, 0.02] |
| Pos. ext. | 0 | 1 | 11.87 [11.87, 11.87] | 0.00 [0.00, 0.00] | 0.02 [0.02, 0.02] | 9.96 [9.96, 9.96] |
QC table by treatment methods
Changes were sample type * treatment specific
sample_data %>% data.frame() %>%
#dplyr::filter(sample_type %in% c("Sputum", "nasal_swab", "BAL")) %>%
group_by (sample_type, treatment) %>%
summarise(N = n(),
`Raw reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Raw_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Raw_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Raw_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Host_mapped/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Host_mapped/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Host_mapped/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads proportion<br>(median [IQR])<br>[%]` = paste(format(round(median(sequencing_host_prop * 100),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(sequencing_host_prop * 100, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(sequencing_host_prop * 100, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Final reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Final_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Final_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Final_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
) %>% data.frame(check.names = F) %>%
arrange(sample_type, treatment) %>%
rename(`Sample type` = sample_type, Treatment = treatment) %>%
mutate_all(linebreak) %>% kbl(format = "html", escape = F) %>% kable_styling(full_width = 0, html_font = "serif")
| Sample type | Treatment | N |
Raw reads (median [IQR]) [reads x 107] |
Host reads (median [IQR]) [reads x 107] |
Host reads proportion (median [IQR]) [%] |
Final reads (median [IQR]) [reads x 107] |
|---|---|---|---|---|---|---|
| BAL | Control | 5 | 15.73 [6.35, 15.92] | 12.92 [5.21, 12.94] | 82.14 [82.12, 82.18] | 0.03 [0.03, 0.04] |
| BAL | lyPMA | 5 | 5.72 [3.59, 13.41] | 4.65 [2.79, 10.90] | 80.73 [77.85, 81.18] | 0.06 [0.04, 0.10] |
| BAL | Benzonase | 5 | 18.59 [16.20, 23.63] | 14.77 [12.80, 18.16] | 79.02 [77.89, 79.45] | 0.17 [0.16, 0.22] |
| BAL | Host zero | 5 | 4.57 [2.32, 4.71] | 2.69 [1.61, 2.93] | 66.98 [57.17, 68.95] | 0.24 [0.13, 0.82] |
| BAL | Molysis | 5 | 4.76 [3.57, 4.86] | 2.78 [1.39, 3.61] | 74.39 [74.29, 75.59] | 0.29 [0.13, 1.56] |
| BAL | QIAamp | 5 | 17.19 [15.35, 17.43] | 11.87 [10.79, 12.22] | 74.88 [71.08, 77.31] | 0.26 [0.10, 1.02] |
| Nasal swab | Control | 10 | 13.09 [7.73, 16.93] | 10.05 [6.11, 13.04] | 77.34 [76.89, 79.45] | 0.48 [0.10, 0.87] |
| Nasal swab | lyPMA | 5 | 0.98 [0.85, 1.24] | 0.63 [0.28, 0.88] | 71.37 [28.73, 74.68] | 0.07 [0.06, 0.08] |
| Nasal swab | Benzonase | 5 | 5.75 [4.95, 6.57] | 3.66 [1.29, 5.05] | 64.58 [63.71, 76.83] | 0.28 [0.26, 1.04] |
| Nasal swab | Host zero | 5 | 2.83 [1.42, 6.42] | 0.49 [0.03, 0.81] | 7.67 [2.33, 25.73] | 2.43 [0.97, 5.03] |
| Nasal swab | Molysis | 5 | 0.99 [0.63, 4.08] | 0.42 [0.06, 0.64] | 41.04 [4.31, 64.24] | 0.32 [0.17, 2.53] |
| Nasal swab | QIAamp | 5 | 6.40 [6.40, 6.80] | 0.86 [0.86, 1.17] | 17.24 [13.51, 19.57] | 4.63 [4.50, 4.67] |
| Sputum | Control | 5 | 8.59 [8.25, 9.27] | 6.87 [6.69, 7.50] | 80.61 [79.92, 80.89] | 0.06 [0.06, 0.09] |
| Sputum | lyPMA | 5 | 10.98 [5.22, 12.78] | 8.82 [3.76, 10.44] | 76.33 [72.02, 80.29] | 0.25 [0.15, 0.44] |
| Sputum | Benzonase | 5 | 10.76 [10.34, 10.82] | 7.81 [7.75, 8.24] | 74.93 [72.13, 75.33] | 0.47 [0.45, 0.59] |
| Sputum | Host zero | 5 | 13.14 [7.64, 13.95] | 4.39 [3.80, 7.71] | 49.71 [31.45, 55.49] | 2.91 [2.36, 3.67] |
| Sputum | Molysis | 5 | 12.59 [10.84, 13.73] | 2.98 [1.83, 4.28] | 27.49 [14.62, 28.19] | 6.11 [5.56, 8.37] |
| Sputum | QIAamp | 5 | 12.35 [12.23, 12.85] | 9.08 [8.41, 9.27] | 71.76 [56.69, 73.50] | 1.16 [1.13, 3.89] |
| Neg. ext. | Control | 1 | 0.02 [0.02, 0.02] | 0.00 [0.00, 0.00] | 3.13 [3.13, 3.13] | 0.02 [0.02, 0.02] |
| Pos. ext. | Control | 1 | 11.87 [11.87, 11.87] | 0.00 [0.00, 0.00] | 0.02 [0.02, 0.02] | 9.96 [9.96, 9.96] |
Figure of reads by treatment (z-score)
Changes were sample type * treatment specific
# Summary figures - facet and z-score -------------------------------------
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
gather(feature, value, Raw_reads:sequencing_host_prop) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "sequencing_host_prop")) %>%
mutate(z_score = scale(value),
feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "sequencing_host_prop"), labels = c("Raw reads", "Host mapped", "Final reads", "Host %"))) %>%
ggplot(aes(x = treatment, y = z_score, fill = treatment)) +
geom_boxplot(lwd = 0.2) +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
facet_grid(sample_type~feature) +
xlab("Treatment") +
ylab("Z score") +
theme_classic(base_family = "serif", base_size = 14) +
guides( x = guide_axis(angle = 90)) +
theme(legend.position = "top") +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment") #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
Results:
2.1. There were no differences in raw reads.
2.2. However, final reads increased after some treatment, and host DNA proportion decreased
QC3. Positive and negative controls
Positive and negative controls were compared with mock community
Reads and host % by treatment
Species richness of controls
Some possible contaminants were identified in extraction controls
#Loading theoretical mock community
zymo_mock <- read_excel("/Users/minsikkim/Dropbox (Partners HealthCare)/@minsik/project_sicas2/data_raw/DAR_20210929_zymo_mock_data.xlsx") %>%
data.frame(row.names = T) %>% rename(mock_theoretical = Mock) %>% mutate(mock_theoretical = mock_theoretical/100) %>%
merge_phyloseq(otu_table(., taxa_are_rows = T), tax_table(phyloseq$phyloseq_count))
phyloseq_control_rel <- rbind(c("mock_theoretical", "mock")) %>% data.frame() %>%
column_to_rownames(var = "X1") %>% rename(sample_type = X2) %>% #making sample_data of mock community
merge_phyloseq(sample_data(.), zymo_mock) %>%
merge_phyloseq(., subset_samples(phyloseq$phyloseq_rel, sample_type == "Pos. ext." | sample_type == "Neg. ext." )) #adding data of controls
#Species richness of each control groups
rowSums(t(otu_table(phyloseq_control_rel)) != 0) %>% kbl(format = "html", caption = "Species richness of controls") %>%
kable_styling(full_width = 0, html_font = "serif")
| x | |
|---|---|
| mock_theoretical | 10 |
| 20220606_Pos | 41 |
| 20220606_Neg | 3 |
Bar plot of controls
Some possible contaminants were identified in extraction controls
#Making label for the controls
sample_data(phyloseq_control_rel)$sample_type <- factor(sample_data(phyloseq_control_rel)$sample_type,
levels = c("mock", "2", "1"),
labels = c("Mock, 10 spp.", "Pos., 41 spp.", "Neg., 3 spp."))
#Manipulating phyloseq - only top 10
tax_table(phyloseq_control_rel) %>%
cbind(species10 = "[Others]") %>%
{top10species <- head(taxa_sums(phyloseq_control_rel), 10) %>% names()
.[top10species, "species10"] <- as.character(.[top10species, "Species"])
.[, 8] <- .[, 8] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq_control_rel
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
plot_bar(., fill="species10") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Species richness of control data") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ sample_type, scales= "free_x", nrow=1)
#there could be opportunistic pathogens...
Results
2.3.1. Negative control showed minimal number of possible contaminants
2.3.2. Positive control contained various contaminants
QC4. Prevalence and abundacne filtering - red flag
Taxa prevance and abundance were checked.
Taxa abundance and prevalence
Histogram of prelanence taxa
No prevalence or abundance filtering (each experimental group is 5% of total sample)
#Calculation of sample prevalence, standard deviation, median abundance across all samples for all bugs and making into a table.
#
#• In initial analysis we will not perform prevalence or abundance filtering (though we may consider this for secondary differential abundance analyses to manage p (features) > n (sample size) problem and issues with multiple hypothesis correction)
taxa_qc <- data.frame("species" = otu_table(phyloseq$phyloseq_rel) %>% t() %>% colnames(),
"prevalence" = ifelse(phyloseq$phyloseq_count %>% otu_table() > 0, 1, 0) %>% t() %>% colSums(), #Prevalence of taxa
"mean_rel_abd" = phyloseq$phyloseq_rel %>% otu_table() %>% t() %>% colMeans(na.rm = T) #mean relativ abundacne
)
hist(log10(taxa_qc$prevalence), xlab = "log10(Taxa prevalence)", main = "Histogram of prevalence of taxa")
Histogram of mean abundance
hist(log10(taxa_qc$mean_rel_abd), xlab = "log10(Mean relative abundance)", main = "Histogram of mean relative abundance")
Red flag taxa
Taxa with low prevalences were red-flagged
red_flag_taxa <- data.frame(species = taxa_qc$species,
red_flag_prev_abd = ifelse(taxa_qc$prevalence < otu_table(phyloseq$phyloseq_rel) %>% t %>% rownames() %>% length * 0.05 & taxa_qc$mean_rel_abd < quantile(taxa_qc$mean_rel_abd, 0.75), 1, 0))
red_flag_taxa
QC 3 results:
3.1. In initial analysis we will not perform prevalence or abundance filtering (though we may consider this for secondary differentialabundance analyses to manage p (features) > n (sample size) problem and issues with multiple hypothesis correction)
3.2. Red flags were made for taxa not satisfying the criteria (prev < 0.05 & mean rel < 0.75Q)
3.3. Although we don’t consider the prevalence of abundance at this time, we can consider their red-flags after running the DA analysis
Analysis
Before anlayzing, alpha diversity indices were calculated for all phyloseq objects
alpha_diversity <- function(data) {
otu_table <- otu_table(data) %>% .[colSums(.) !=0]
S.obs <- rowSums(t(otu_table) != 0)
sample_data <- sample_data(data)
data_evenness <- vegan::diversity(t(otu_table)) / log(vegan::specnumber(t(otu_table))) # calculate evenness index using vegan package
data_shannon <- vegan::diversity(t(otu_table), index = "shannon") # calculate Shannon index using vegan package
data_hill <- exp(data_shannon) # calculate Hills index
data_dominance <- microbiome::dominance(otu_table, index = "all", rank = 1, aggregate = TRUE) # dominance (Berger-Parker index), etc.
data_invsimpson <- vegan::diversity(t(otu_table), index = "invsimpson") # calculate Shannon index using vegan package
alpha_diversity <- cbind(S.obs, data_shannon, data_hill, data_invsimpson, data_evenness,data_dominance) # combine all indices in one data table
sample_data <- merge(data.frame(sample_data), alpha_diversity, by = 0, all = T) %>% column_to_rownames(var = "Row.names")
}
sample_data(phyloseq$phyloseq_rel) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_count) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_path_rpkm) <- sample_data(alpha_diversity(phyloseq$phyloseq_path_rpkm))
A1. Host DNA, bacterial DNA by smaple type and treatment
qPCR and sequencing results
qPCR result
#2A: Change in total DNA (qPCR)
f2a <- ggplot(data_qPCR, aes(x = sample_type, y = log10(DNA_host_nondil + DNA_bac_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("log<sub>10</sub>(qPCR total DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "A") +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2B: Change in human DNA (qPCR)
f2b <- ggplot(data_qPCR, aes(x = sample_type, y = log10(DNA_host_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
ylab("log<sub>10</sub>(qPCR host DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif")+
labs(tag = "B") +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2C: Change in 16S DNA (qPCR)
f2c <- ggplot(data_qPCR, aes(x = sample_type, y = log10(DNA_bac_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
ylab("log<sub>10</sub>(qPCR bacterial DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif")+
labs(tag = "C") +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2D. Change in % host (qPCR)
f2d <- ggplot(data_qPCR, aes(x = sample_type, y = log10(host_proportion * 100))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
ylab("log<sub>10</sub>(host DNA) (%)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "D") +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#output for markdown
ggarrange(f2a, f2b, f2c, f2d, common.legend = T , align = "hv")
Figure 2. qPCR result of host depletion study. A. Total DNA B. Host DNA C. Bacterial DNA D. Host %
Sequencing result
f3a <- ggplot(subset(sample_data, sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(x = sample_type, y = Raw_reads)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Sample type")+
ylab("Raw reads") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "A") +
theme(plot.tag = element_text(size = 15)) + # Plot title size
guides(fill = guide_legend(nrow = 1))
# - Host_mapped
f3b <- ggplot(subset(sample_data, sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(x = sample_type, y = Host_mapped)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Sample type")+
ylab("Host mapped reaeds") +
theme_classic (base_size = 12, base_family = "serif")+
labs(tag = "B") +
theme(plot.tag = element_text(size = 15)) + # Plot title size
guides(fill = guide_legend(nrow = 1))
# - % Host (we have used Host_mapped/Raw_reads in prior papers)
f3d <- ggplot(subset(sample_data, sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(x = sample_type, y = Host_mapped/Raw_reads)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Sample type")+
ylab("Sequencing host DNA (%)") +
theme_classic (base_size = 12, base_family = "serif")+
labs(tag = "D") +
theme(plot.tag = element_text(size = 15)) + # Plot title size
guides(fill = guide_legend(nrow = 1))
# - Final_reads
f3c <- ggplot(subset(sample_data, sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(x = sample_type, y = Final_reads)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Sample type")+
ylab("Final reads") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "C") +
theme(plot.tag = element_text(size = 15)) + # Plot title size
guides(fill = guide_legend(nrow = 1))
ggarrange(f3a, f3b, f3c, f3d, common.legend = T, align = "hv")
Figure 3. Sequencing result of host depletion study. A. Total DNA B. Host DNA C. Bacterial DNA D. Host %
Results A1
1.1. Some changed were observed, for both host DNA and bacterial DNA.
1.2. Sequencing results need to be added
This will be Fig 2. of the manuscript, after removing positives and negatives
A2. Modeling on sequencing results
As some changed were observed after treatment, linear mixed effect models were employed for testing.
Test results
Library failure - ANOVA
Some samples failed in library prep. What type of sample were fragile to treatments?
glm ( library fail ~ sample_type + treatment + sample_type * treatment + subject_id )
glmer(lib_failed ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
Anova %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>Chisq)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>% column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Chisq | Df | Pr(>Chisq) | ||
|---|---|---|---|---|
| sample_type | 14.08825 | 2 | 0.0008725 |
|
| treatment | 36.68858 | 5 | 0.0000007 |
|
| sample_type * treatment | 34.06531 | 10 | 0.0001801 |
|
Library failure
glm ( sequencing fail ~ sample_type + treatment + sample_type * treatment + subject_id )
Nasal swabs were fragile to lyPMA and Molysis
glmer(lib_failed ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`t value`) > 2 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | t value | ||
|---|---|---|---|---|
| (Intercept) | 0.0000000 | 0.1167011 | 0.0000000 | |
| Nasal swab | -0.0204626 | 0.1470919 | -0.1391145 | |
| Sputum | 0.0000000 | 0.1650403 | 0.0000000 | |
| lyPMA | 0.2000000 | 0.1619675 | 1.2348160 | |
| Benzonase | 0.0000000 | 0.1619675 | 0.0000000 | |
| Host zero | 0.2000000 | 0.1619675 | 1.2348160 | |
| Molysis | 0.2000000 | 0.1619675 | 1.2348160 | |
| QIAamp | 0.0000000 | 0.1619675 | 0.0000000 | |
| Nasal swab * lyPMA | 0.5978181 | 0.2143680 | 2.7887474 |
|
| Sputum * lyPMA | -0.2000000 | 0.2290566 | -0.8731468 | |
| Nasal swab * Benzonase | -0.0021819 | 0.2143680 | -0.0101781 | |
| Sputum * Benzonase | 0.0000000 | 0.2290566 | 0.0000000 | |
| Nasal swab * Host zero | 0.1978181 | 0.2143680 | 0.9227971 | |
| Sputum * Host zero | -0.2000000 | 0.2290566 | -0.8731468 | |
| Nasal swab * Molysis | 0.6021819 | 0.2143680 | 2.8091035 |
|
| Sputum * Molysis | -0.2000000 | 0.2290566 | -0.8731468 | |
| Nasal swab * QIAamp | 0.0021819 | 0.2143680 | 0.0101781 | |
| Sputum * QIAamp | 0.0000000 | 0.2290566 | 0.0000000 |
Sequencing failure
Modeling of sequencing failure were not available due to low number of cases.
BAL079 - control & lyPMA failed sequencing.
sample_data(phyloseq$phyloseq_count) %>% data.frame %>% mutate(sequencing_fail = (S.obs == 0)) %>%
glmer(sequencing_fail ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = .) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`t value`) > 2 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | t value | ||
|---|---|---|---|---|
| (Intercept) | 0.2 | 0.0667184 | 2.997673 |
|
| Nasal swab | -0.2 | 0.0929713 | -2.151203 |
|
| Sputum | -0.2 | 0.0943541 | -2.119675 |
|
| lyPMA | 0.0 | 0.0797156 | 0.000000 | |
| Benzonase | -0.2 | 0.0797156 | -2.508919 |
|
| Host zero | -0.2 | 0.0797156 | -2.508919 |
|
| Molysis | -0.2 | 0.0797156 | -2.508919 |
|
| QIAamp | -0.2 | 0.0797156 | -2.508919 |
|
| Nasal swab * lyPMA | 0.0 | 0.1057299 | 0.000000 | |
| Sputum * lyPMA | 0.0 | 0.1127349 | 0.000000 | |
| Nasal swab * Benzonase | 0.2 | 0.1057299 | 1.891613 | |
| Sputum * Benzonase | 0.2 | 0.1127349 | 1.774074 | |
| Nasal swab * Host zero | 0.2 | 0.1057299 | 1.891613 | |
| Sputum * Host zero | 0.2 | 0.1127349 | 1.774074 | |
| Nasal swab * Molysis | 0.2 | 0.1057299 | 1.891613 | |
| Sputum * Molysis | 0.2 | 0.1127349 | 1.774074 | |
| Nasal swab * QIAamp | 0.2 | 0.1057299 | 1.891613 | |
| Sputum * QIAamp | 0.2 | 0.1127349 | 1.774074 |
log10(Final reads) - ANOVA
Which methods was effective in increasing the final reads?
Interaction term was significant
lmer(log10(Final_reads) ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
anova %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>% column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 4.548499 | 2.2742495 | 2 | 9.018238 | 12.175621 | 0.0027403 |
|
| treatment | 22.419714 | 4.4839428 | 5 | 68.204420 | 24.005629 | 0.0000000 |
|
| sample_type * treatment | 6.545334 | 0.6545334 | 10 | 68.184280 | 3.504167 | 0.0009047 |
|
log10(Final reads)
Which methods was effective in increasing the final reads?
lmer( log10(Final reads) vs sample_type + treatment + sample_type * treatment + subject_id )
Except lyPMA, every methods increased final reads
lmer(log10(Final_reads) ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 5.4933063 | 0.2095210 | 62.79820 | 26.2184086 | 0.0000000 |
|
| Nasal swab | 1.1246781 | 0.2789651 | 37.26869 | 4.0316080 | 0.0002633 |
|
| Sputum | 0.3332900 | 0.2963074 | 62.79820 | 1.1248117 | 0.2649498 | |
| lyPMA | 0.3521385 | 0.2733402 | 67.65016 | 1.2882794 | 0.2020376 | |
| Benzonase | 0.8109049 | 0.2733402 | 67.65016 | 2.9666509 | 0.0041580 |
|
| Host zero | 0.9501904 | 0.2733402 | 67.65016 | 3.4762193 | 0.0008929 |
|
| Molysis | 1.0358039 | 0.2733402 | 67.65016 | 3.7894313 | 0.0003238 |
|
| QIAamp | 1.0480985 | 0.2733402 | 67.65016 | 3.8344107 | 0.0002788 |
|
| Nasal swab * lyPMA | -0.8774152 | 0.3621899 | 68.18953 | -2.4225278 | 0.0180749 |
|
| Sputum * lyPMA | 0.1879364 | 0.3865614 | 67.65016 | 0.4861747 | 0.6284145 | |
| Nasal swab * Benzonase | -0.6887274 | 0.3621899 | 68.18953 | -1.9015642 | 0.0614539 | |
| Sputum * Benzonase | 0.0349655 | 0.3865614 | 67.65016 | 0.0904526 | 0.9281949 | |
| Nasal swab * Host zero | -0.1157121 | 0.3621899 | 68.18953 | -0.3194791 | 0.7503399 | |
| Sputum * Host zero | 0.7231403 | 0.3865614 | 67.65016 | 1.8706997 | 0.0657128 | |
| Nasal swab * Molysis | -0.8411412 | 0.3621899 | 68.18953 | -2.3223760 | 0.0232052 |
|
| Sputum * Molysis | 0.9543008 | 0.3865614 | 67.65016 | 2.4686913 | 0.0160928 |
|
| Nasal swab * QIAamp | 0.0236242 | 0.3621899 | 68.18953 | 0.0652260 | 0.9481849 | |
| Sputum * QIAamp | 0.3676901 | 0.3865614 | 67.65016 | 0.9511817 | 0.3448982 |
Host % ANOVA
Which methods was effective in lowering host %
Interaction term was significant
lmer(sequencing_host_prop ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
anova %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>% column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 1.1399050 | 0.5699525 | 2 | 3.486053 | 27.593506 | 0.0072923 |
|
| treatment | 2.1022767 | 0.4204553 | 5 | 68.821547 | 20.355796 | 0.0000000 |
|
| sample_type * treatment | 0.9200108 | 0.0920011 | 10 | 68.804812 | 4.454112 | 0.0000768 |
|
Host %
Which methods was effective in lowering host %
lmer( Host DNA % vs sample_type + treatment + sample_type * treatment + (1|subject_id) )
Host zero was effect to to all types. Molysis was effective to nasal swab and sputum. QIAamp was effective for nasal swab only.
lmer(sequencing_host_prop ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.8200713 | 0.0646272 | 75.67093 | 12.6892671 | 0.0000000 |
|
| Nasal swab | -0.0378253 | 0.0799149 | 41.00023 | -0.4733196 | 0.6384957 | |
| Sputum | -0.0293956 | 0.0913966 | 75.67093 | -0.3216265 | 0.7486224 | |
| lyPMA | -0.0377605 | 0.0908962 | 68.23467 | -0.4154243 | 0.6791348 | |
| Benzonase | -0.0324363 | 0.0908962 | 68.23467 | -0.3568498 | 0.7223055 | |
| Host zero | -0.2000025 | 0.0908962 | 68.23467 | -2.2003393 | 0.0311715 |
|
| Molysis | -0.1577230 | 0.0908962 | 68.23467 | -1.7351990 | 0.0872202 | |
| QIAamp | -0.0928027 | 0.0908962 | 68.23467 | -1.0209737 | 0.3108732 | |
| Nasal swab * lyPMA | -0.1931415 | 0.1202628 | 68.80454 | -1.6059959 | 0.1128550 | |
| Sputum * lyPMA | 0.0106590 | 0.1285467 | 68.23467 | 0.0829196 | 0.9341582 | |
| Nasal swab * Benzonase | -0.1301969 | 0.1202628 | 68.80454 | -1.0826036 | 0.2827639 | |
| Sputum * Benzonase | -0.0432386 | 0.1285467 | 68.23467 | -0.3363648 | 0.7376281 | |
| Nasal swab * Host zero | -0.3924910 | 0.1202628 | 68.80454 | -3.2636117 | 0.0017148 |
|
| Sputum * Host zero | -0.1532698 | 0.1285467 | 68.23467 | -1.1923278 | 0.2372628 | |
| Nasal swab * Molysis | -0.2637315 | 0.1202628 | 68.80454 | -2.1929608 | 0.0316937 |
|
| Sputum * Molysis | -0.3863225 | 0.1285467 | 68.23467 | -3.0053090 | 0.0037094 |
|
| Nasal swab * QIAamp | -0.5240678 | 0.1202628 | 68.80454 | -4.3576894 | 0.0000449 |
|
| Sputum * QIAamp | -0.0370057 | 0.1285467 | 68.23467 | -0.2878775 | 0.7743131 |
Results
1. Library failure was associated with nasal swab, especially after lyPMA and Molysis treatment
2. Benzonase, host-zero, Molysis, and QIAamp increased final reads
3. Host-zero lowered host %. For otheres, there were significant sample_type specific treatment efficiencies
A3. LM of taxa alpha diversity
Alpha diversity could be having changes due to treatment.
Both stratified and nonstratified analyses were conducted.
Figure - Alpha diversity
sample_data <- sample_data(phyloseq$phyloseq_count)
f4a <- ggplot(subset(sample_data(phyloseq$phyloseq_count), sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(y = S.obs)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Species richness") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "A") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4b <- ggplot(subset(sample_data(phyloseq$phyloseq_count), sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(y = data_shannon)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Shannon") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "B") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4c <- ggplot(subset(sample_data(phyloseq$phyloseq_count), sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(y = data_invsimpson)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Inverse simpson") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "C") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4d <- ggplot(subset(sample_data(phyloseq$phyloseq_count), sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(y = dbp)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Berger-Parker index") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "D") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
ggarrange(f4a, f4b, f4c, f4d, common.legend = T, align = "hv") # alpha diversity plots
Species richness
All samples:
S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
Stratified:
S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
Species richness (all samples & interaction term) - ANOVA
Interaction term was significant
sample_data <- sample_data(phyloseq$phyloseq_count) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
lmer_sob <- lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_sob %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 7547.487 | 3773.744 | 2 | 10.79325 | 30.28214 | 0.0000374 |
|
| treatment | 7333.548 | 1466.710 | 5 | 64.92720 | 11.76951 | 0.0000000 |
|
| log10(Final_reads) | 2018.806 | 2018.806 | 1 | 67.56866 | 16.19977 | 0.0001465 |
|
| sample_type:treatment | 19463.441 | 1946.344 | 10 | 64.59903 | 15.61830 | 0.0000000 |
|
Species richness (all samples & interaction term)
Incrase at sputum was at every treatment
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -62.286176 | 18.946348 | 73.91996 | -3.2875031 | 0.0015486 |
|
| Nasal swab | -9.894010 | 13.143390 | 19.76996 | -0.7527746 | 0.4604558 | |
| Sputum | 5.066489 | 11.833705 | 23.07241 | 0.4281405 | 0.6725190 | |
| lyPMA | -3.032607 | 7.992068 | 64.29757 | -0.3794521 | 0.7056025 | |
| Benzonase | -4.719045 | 8.051585 | 65.30664 | -0.5861014 | 0.5598277 | |
| Host zero | -2.064590 | 8.208256 | 65.44300 | -0.2515260 | 0.8021956 | |
| Molysis | 10.862491 | 8.314322 | 65.52729 | 1.3064795 | 0.1959566 | |
| QIAamp | -2.491588 | 8.330143 | 65.53938 | -0.2991050 | 0.7658063 | |
| log10(Final_reads) | 12.532134 | 3.113656 | 67.56866 | 4.0248934 | 0.0001465 |
|
| Nasal swab * lyPMA | 4.202591 | 10.419319 | 64.56848 | 0.4033461 | 0.6880263 | |
| Sputum * lyPMA | 34.664316 | 10.599221 | 64.21630 | 3.2704588 | 0.0017287 |
|
| Nasal swab * Benzonase | 1.775047 | 10.042806 | 64.92798 | 0.1767481 | 0.8602564 | |
| Sputum * Benzonase | 62.518484 | 10.357810 | 64.45145 | 6.0358784 | 0.0000001 |
|
| Nasal swab * Host zero | 4.793943 | 9.787379 | 64.68743 | 0.4898086 | 0.6259264 | |
| Sputum * Host zero | 92.094185 | 10.559614 | 64.43191 | 8.7213589 | 0.0000000 |
|
| Nasal swab * Molysis | -7.289176 | 10.178107 | 65.15977 | -0.7161623 | 0.4764500 | |
| Sputum * Molysis | 87.197251 | 10.723044 | 64.48528 | 8.1317631 | 0.0000000 |
|
| Nasal swab * QIAamp | -3.726532 | 9.774308 | 64.66718 | -0.3812579 | 0.7042616 | |
| Sputum * QIAamp | 70.548734 | 10.400892 | 64.40670 | 6.7829501 | 0.0000000 |
|
Species richness - stratified (NS)
Molysis and host zero may incrased speciess richness of nasal swab
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -26.0233860 | 9.340261 | 28 | -2.7861520 | 0.0094654 |
|
| lyPMA | -2.1153025 | 2.324810 | 28 | -0.9098818 | 0.3706512 | |
| Benzonase | -1.7637014 | 2.206148 | 28 | -0.7994486 | 0.4307605 | |
| Host zero | 8.8224892 | 2.495655 | 28 | 3.5351399 | 0.0014386 |
|
| Molysis | 4.5783098 | 2.217883 | 28 | 2.0642703 | 0.0483678 |
|
| QIAamp | 0.8360832 | 2.678401 | 28 | 0.3121575 | 0.7572336 | |
| log10(Final_reads) | 5.6349921 | 1.419898 | 28 | 3.9685903 | 0.0004571 |
|
Species richness at BAL
No changes observed
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -100.395549 | 25.504894 | 20.38095 | -3.9363249 | 0.0007925 |
|
| lyPMA | -5.839094 | 7.201885 | 17.39942 | -0.8107731 | 0.4284473 | |
| Benzonase | -10.667780 | 7.752673 | 18.42945 | -1.3760131 | 0.1853100 | |
| Host zero | -8.986746 | 8.091470 | 18.61512 | -1.1106444 | 0.2808621 | |
| Molysis | 3.342010 | 8.316699 | 18.72132 | 0.4018433 | 0.6923498 | |
| QIAamp | -10.097992 | 8.350027 | 18.73603 | -1.2093364 | 0.2415732 | |
| log10(Final_reads) | 19.520813 | 4.535759 | 19.97726 | 4.3037587 | 0.0003466 |
|
Species richness at sputum
Benzonase may incrased speciess richness of sputum
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -166.73637 | 94.19800 | 20.78908 | -1.770063 | 0.0913826 | |
| lyPMA | 21.48046 | 13.48708 | 19.68002 | 1.592670 | 0.1271695 | |
| Benzonase | 41.90046 | 17.06102 | 19.99448 | 2.455918 | 0.0233254 |
|
| Host zero | 58.57768 | 28.77613 | 20.32431 | 2.035634 | 0.0550337 | |
| Molysis | 60.65374 | 33.57099 | 20.37080 | 1.806731 | 0.0855997 | |
| QIAamp | 41.44599 | 24.96239 | 20.26633 | 1.660337 | 0.1122410 | |
| log10(Final_reads) | 31.32813 | 16.05007 | 20.49824 | 1.951899 | 0.0647464 |
Shannon
All samples:
Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
Stratified:
Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)
ANOVA Shannon
Similar pattern with species richness
lmer_shannon <- lmer(data_shannon ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_shannon %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 3.3128614 | 1.6564307 | 2 | 10.63403 | 7.318283 | 0.0100286 |
|
| treatment | 6.1394953 | 1.2278991 | 5 | 64.78018 | 5.424986 | 0.0003143 |
|
| log10(Final_reads) | 0.9864384 | 0.9864384 | 1 | 67.39196 | 4.358187 | 0.0406149 |
|
| sample_type * treatment | 5.5684422 | 0.5568442 | 10 | 64.45592 | 2.460196 | 0.0147100 |
|
Shannon (all samples & interaction term)
Similar pattern with species richness
#Shannon
lmer_shannon %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.2922817 | 0.8090108 | 73.88309 | 2.8334375 | 0.0059333 |
|
| Nasal swab | 0.4603976 | 0.5653783 | 19.28942 | 0.8143179 | 0.4254023 | |
| Sputum | 0.5529713 | 0.5086504 | 22.46019 | 1.0871343 | 0.2885070 | |
| lyPMA | 0.1726922 | 0.3406059 | 64.15772 | 0.5070146 | 0.6138828 | |
| Benzonase | 0.3896151 | 0.3431787 | 65.15496 | 1.1353129 | 0.2604034 | |
| Host zero | 0.2374929 | 0.3498614 | 65.28972 | 0.6788198 | 0.4996513 | |
| Molysis | 1.0035787 | 0.3543855 | 65.37303 | 2.8318841 | 0.0061444 |
|
| QIAamp | 0.4240802 | 0.3550603 | 65.38498 | 1.1943892 | 0.2366407 | |
| log10(Final_reads) | -0.2771226 | 0.1327452 | 67.39196 | -2.0876272 | 0.0406149 |
|
| Nasal swab * lyPMA | -0.4306354 | 0.4440629 | 64.42526 | -0.9697620 | 0.3357919 | |
| Sputum * lyPMA | 0.9407250 | 0.4517137 | 64.07751 | 2.0825690 | 0.0412874 |
|
| Nasal swab * Benzonase | -0.1913205 | 0.4280323 | 64.78076 | -0.4469766 | 0.6563828 | |
| Sputum * Benzonase | 1.1910341 | 0.4414361 | 64.30980 | 2.6980895 | 0.0088969 |
|
| Nasal swab * Host zero | 0.2059598 | 0.4171353 | 64.54324 | 0.4937482 | 0.6231601 | |
| Sputum * Host zero | 1.5319202 | 0.4500358 | 64.29022 | 3.4039963 | 0.0011480 |
|
| Nasal swab * Molysis | -0.6557050 | 0.4338096 | 65.01040 | -1.5115042 | 0.1355060 | |
| Sputum * Molysis | 0.9831440 | 0.4570035 | 64.34280 | 2.1512836 | 0.0352141 |
|
| Nasal swab * QIAamp | -0.2868142 | 0.4165773 | 64.52336 | -0.6885016 | 0.4936051 | |
| Sputum * QIAamp | 1.0954341 | 0.4432702 | 64.26548 | 2.4712562 | 0.0161247 |
|
Shannon - stratified (NS)
Similar pattern with species richness
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.4705305 | 0.7141282 | 28 | 4.859814 | 0.0000407 |
|
| lyPMA | -0.3473353 | 0.1777480 | 28 | -1.954088 | 0.0607452 | |
| Benzonase | 0.1820609 | 0.1686754 | 28 | 1.079356 | 0.2896393 | |
| Host zero | 0.5077044 | 0.1908103 | 28 | 2.660782 | 0.0127593 |
|
| Molysis | 0.3999084 | 0.1695727 | 28 | 2.358331 | 0.0255726 |
|
| QIAamp | 0.2884031 | 0.2047825 | 28 | 1.408339 | 0.1700383 | |
| log10(Final_reads) | -0.3901164 | 0.1085611 | 28 | -3.593519 | 0.0012351 |
|
Shannon - stratified (BAL)
Similar pattern with species richness
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.4183961 | 1.4942036 | 20.85902 | 0.2800128 | 0.7822258 | |
| lyPMA | 0.0342340 | 0.4350346 | 17.07472 | 0.0786927 | 0.9381919 | |
| Benzonase | 0.0898923 | 0.4638505 | 18.70718 | 0.1937959 | 0.8484245 | |
| Host zero | -0.1102537 | 0.4832242 | 18.99852 | -0.2281627 | 0.8219575 | |
| Molysis | 0.6263137 | 0.4961366 | 19.16322 | 1.2623816 | 0.2219587 | |
| QIAamp | 0.0425762 | 0.4980493 | 19.18590 | 0.0854859 | 0.9327609 | |
| log10(Final_reads) | 0.0676641 | 0.2665336 | 20.80923 | 0.2538672 | 0.8020897 |
Shannon - stratified (Spt)
Similar pattern with species richness
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.1874597 | 3.2210139 | 20.47081 | 0.9895827 | 0.3339316 | |
| lyPMA | 1.1451368 | 0.4583906 | 19.43439 | 2.4981682 | 0.0215971 |
|
| Benzonase | 1.6303288 | 0.5807036 | 19.65341 | 2.8075057 | 0.0109962 |
|
| Host zero | 1.8676909 | 0.9809856 | 19.88476 | 1.9038922 | 0.0714978 | |
| Molysis | 2.1036052 | 1.1447010 | 19.91755 | 1.8376897 | 0.0810732 | |
| QIAamp | 1.6026662 | 0.8507368 | 19.84393 | 1.8838567 | 0.0743222 | |
| log10(Final_reads) | -0.3358544 | 0.5476150 | 20.00774 | -0.6133039 | 0.5465850 |
Simpson
Inverse Simpson of all samples:
Inverse Simpson ~ sample_type * treatment + (1|original_sample)
Stratified:
Inverse Simpson ~ treatment + (1|original_sample)
Inv Simp - ANOVA
lmer_invsimpson <- lmer(data_invsimpson ~ sample_type * treatment + (1|subject_id), data = sample_data)
lmer_invsimpson %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 66.71888 | 33.35944 | 2 | 10.09954 | 6.331375 | 0.0165144 |
|
| treatment | 108.79840 | 21.75968 | 5 | 65.57511 | 4.129826 | 0.0025391 |
|
| sample_type * treatment | 146.81964 | 14.68196 | 10 | 65.54979 | 2.786528 | 0.0062113 |
|
Simpson (all samples & interaction term)
#Simpson
lmer_invsimpson %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.7636785 | 1.590206 | 31.25786 | 1.7379371 | 0.0920661 | |
| Nasal swab | -0.7517231 | 2.274375 | 19.94045 | -0.3305186 | 0.7444557 | |
| Sputum | 0.1251783 | 2.177249 | 28.40089 | 0.0574938 | 0.9545544 | |
| lyPMA | -0.2135194 | 1.623100 | 65.17797 | -0.1315504 | 0.8957452 | |
| Benzonase | -0.4293400 | 1.557133 | 65.78621 | -0.2757247 | 0.7836238 | |
| Host zero | -1.0688451 | 1.557133 | 65.78621 | -0.6864187 | 0.4948615 | |
| Molysis | 2.3309395 | 1.557133 | 65.78621 | 1.4969432 | 0.1391907 | |
| QIAamp | -0.7961192 | 1.557133 | 65.78621 | -0.5112725 | 0.6108720 | |
| Nasal swab * lyPMA | 0.0633029 | 2.059718 | 65.40566 | 0.0307338 | 0.9755754 | |
| Sputum * lyPMA | 3.6202491 | 2.177618 | 65.17797 | 1.6624816 | 0.1012185 | |
| Nasal swab * Benzonase | 0.9659709 | 2.008145 | 65.78191 | 0.4810265 | 0.6320938 | |
| Sputum * Benzonase | 8.5483828 | 2.128903 | 65.50473 | 4.0153940 | 0.0001553 |
|
| Nasal swab * Host zero | 1.3633721 | 2.008145 | 65.78191 | 0.6789212 | 0.4995696 | |
| Sputum * Host zero | 7.2535197 | 2.128903 | 65.50473 | 3.4071637 | 0.0011264 |
|
| Nasal swab * Molysis | -1.5517967 | 2.008145 | 65.78191 | -0.7727514 | 0.4424373 | |
| Sputum * Molysis | 4.3813408 | 2.128903 | 65.50473 | 2.0580278 | 0.0435704 |
|
| Nasal swab * QIAamp | 0.4532215 | 2.008145 | 65.78191 | 0.2256916 | 0.8221410 | |
| Sputum * QIAamp | 5.6968238 | 2.128903 | 65.50473 | 2.6759438 | 0.0094040 |
|
Inverse Simpson - stratified (NS)
Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_invsimpson ~ treatment + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.1261254 | 0.2740225 | 29 | 7.7589453 | 0.0000000 |
|
| lyPMA | -0.2057118 | 0.4746209 | 29 | -0.4334234 | 0.6679143 | |
| Benzonase | 0.4811356 | 0.4746209 | 29 | 1.0137263 | 0.3190970 | |
| Host zero | 0.2390318 | 0.4746209 | 29 | 0.5036268 | 0.6183281 | |
| Molysis | 0.8346381 | 0.4746209 | 29 | 1.7585366 | 0.0892065 | |
| QIAamp | -0.2874024 | 0.4746209 | 29 | -0.6055411 | 0.5495297 |
Inverse Simpson - stratified (BAL)
lmer(data_invsimpson ~ treatment + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.3496371 | 1.416668 | 22 | 1.6585655 | 0.1113944 | |
| lyPMA | -0.2135194 | 2.003471 | 22 | -0.1065747 | 0.9160922 | |
| Benzonase | -0.0152987 | 1.900660 | 22 | -0.0080491 | 0.9936503 | |
| Host zero | -0.6548037 | 1.900660 | 22 | -0.3445139 | 0.7337314 | |
| Molysis | 2.7449809 | 1.900660 | 22 | 1.4442252 | 0.1627674 | |
| QIAamp | -0.3820778 | 1.900660 | 22 | -0.2010238 | 0.8425269 |
Inverse Simpson - stratified (spt)
lmer(data_invsimpson ~ treatment + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.888857 | 2.105591 | 8.714033 | 1.371993 | 0.2043471 | |
| lyPMA | 3.406730 | 1.901306 | 20.000000 | 1.791784 | 0.0883118 | |
| Benzonase | 8.119043 | 1.901306 | 20.000000 | 4.270245 | 0.0003740 |
|
| Host zero | 6.184675 | 1.901306 | 20.000000 | 3.252856 | 0.0039845 |
|
| Molysis | 6.712280 | 1.901306 | 20.000000 | 3.530352 | 0.0021020 |
|
| QIAamp | 4.900705 | 1.901306 | 20.000000 | 2.577546 | 0.0179780 |
|
Burger-Parker index
BPI of all samples:
BPI ~ sample_type + treatment + log10 (Final_reads) + (1|original_sample)
BPI stratified:
BPI ~ treatment + log10 (Final_reads) + (1|original_sample)
BPI -ANOVA
lmer_dbp <- lmer(dbp ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_dbp %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.2829103 | 0.1414552 | 2 | 10.48239 | 4.591210 | 0.0369787 |
|
| treatment | 0.9164438 | 0.1832888 | 5 | 64.73516 | 5.949003 | 0.0001381 |
|
| log10(Final_reads) | 0.3982546 | 0.3982546 | 1 | 68.06101 | 12.926151 | 0.0006086 |
|
| sample_type * treatment | 0.4601290 | 0.0460129 | 10 | 64.32239 | 1.493441 | 0.1624116 |
BPI - all samples & interaction term
#BPI
lmer_dbp <- lmer(dbp ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_dbp %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html", caption = "Without interaction term") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -0.2035448 | 0.2934875 | 73.98607 | -0.6935381 | 0.4901436 | |
| Nasal swab | -0.2746027 | 0.1918494 | 21.39063 | -1.4313447 | 0.1667807 | |
| Sputum | -0.1751176 | 0.1739103 | 25.68997 | -1.0069421 | 0.3233533 | |
| lyPMA | -0.1112415 | 0.1256539 | 63.94756 | -0.8853014 | 0.3793110 | |
| Benzonase | -0.2369262 | 0.1264658 | 65.21502 | -1.8734412 | 0.0654896 | |
| Host zero | -0.1391248 | 0.1289092 | 65.38675 | -1.0792465 | 0.2844429 | |
| Molysis | -0.4135086 | 0.1305640 | 65.49290 | -3.1670959 | 0.0023387 |
|
| QIAamp | -0.2351370 | 0.1308108 | 65.50813 | -1.7975344 | 0.0768596 | |
| log10(Final_reads) | 0.1754176 | 0.0487909 | 68.06101 | 3.5952957 | 0.0006086 |
|
| Nasal swab * lyPMA | 0.1304024 | 0.1637733 | 64.28956 | 0.7962372 | 0.4288246 | |
| Sputum * lyPMA | -0.2040959 | 0.1666573 | 63.84450 | -1.2246442 | 0.2252088 | |
| Nasal swab * Benzonase | 0.1025637 | 0.1578004 | 64.73833 | 0.6499583 | 0.5180190 | |
| Sputum * Benzonase | -0.2217425 | 0.1628249 | 64.14039 | -1.3618465 | 0.1780105 | |
| Nasal swab * Host zero | -0.0772609 | 0.1538230 | 64.43370 | -0.5022718 | 0.6171886 | |
| Sputum * Host zero | -0.4529917 | 0.1660001 | 64.11913 | -2.7288631 | 0.0081927 |
|
| Nasal swab * Molysis | 0.2090844 | 0.1598905 | 65.02325 | 1.3076721 | 0.1955892 | |
| Sputum * Molysis | -0.2825321 | 0.1685606 | 64.18786 | -1.6761456 | 0.0985744 | |
| Nasal swab * QIAamp | 0.1125011 | 0.1536206 | 64.40678 | 0.7323308 | 0.4666234 | |
| Sputum * QIAamp | -0.2947874 | 0.1635091 | 64.08537 | -1.8028812 | 0.0761087 |
#BPI
lmer_dbp %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html", caption = "With interaction term") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -0.2035448 | 0.2934875 | 73.98607 | -0.6935381 | 0.4901436 | |
| Nasal swab | -0.2746027 | 0.1918494 | 21.39063 | -1.4313447 | 0.1667807 | |
| Sputum | -0.1751176 | 0.1739103 | 25.68997 | -1.0069421 | 0.3233533 | |
| lyPMA | -0.1112415 | 0.1256539 | 63.94756 | -0.8853014 | 0.3793110 | |
| Benzonase | -0.2369262 | 0.1264658 | 65.21502 | -1.8734412 | 0.0654896 | |
| Host zero | -0.1391248 | 0.1289092 | 65.38675 | -1.0792465 | 0.2844429 | |
| Molysis | -0.4135086 | 0.1305640 | 65.49290 | -3.1670959 | 0.0023387 |
|
| QIAamp | -0.2351370 | 0.1308108 | 65.50813 | -1.7975344 | 0.0768596 | |
| log10(Final_reads) | 0.1754176 | 0.0487909 | 68.06101 | 3.5952957 | 0.0006086 |
|
| Nasal swab * lyPMA | 0.1304024 | 0.1637733 | 64.28956 | 0.7962372 | 0.4288246 | |
| Sputum * lyPMA | -0.2040959 | 0.1666573 | 63.84450 | -1.2246442 | 0.2252088 | |
| Nasal swab * Benzonase | 0.1025637 | 0.1578004 | 64.73833 | 0.6499583 | 0.5180190 | |
| Sputum * Benzonase | -0.2217425 | 0.1628249 | 64.14039 | -1.3618465 | 0.1780105 | |
| Nasal swab * Host zero | -0.0772609 | 0.1538230 | 64.43370 | -0.5022718 | 0.6171886 | |
| Sputum * Host zero | -0.4529917 | 0.1660001 | 64.11913 | -2.7288631 | 0.0081927 |
|
| Nasal swab * Molysis | 0.2090844 | 0.1598905 | 65.02325 | 1.3076721 | 0.1955892 | |
| Sputum * Molysis | -0.2825321 | 0.1685606 | 64.18786 | -1.6761456 | 0.0985744 | |
| Nasal swab * QIAamp | 0.1125011 | 0.1536206 | 64.40678 | 0.7323308 | 0.4666234 | |
| Sputum * QIAamp | -0.2947874 | 0.1635091 | 64.08537 | -1.8028812 | 0.0761087 |
BPI - stratified (NS)
BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| x | Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|---|
| (Intercept) | -0.8393907 | 0.3486121 | 27.99482 | -2.4078071 | 0.0228843 |
|
| lyPMA | 0.0510698 | 0.0805909 | 25.70455 | 0.6336915 | 0.5318810 | |
| Benzonase | -0.1377638 | 0.0764151 | 25.81764 | -1.8028343 | 0.0831000 | |
| Host zero | -0.2586338 | 0.0873090 | 26.15521 | -2.9622802 | 0.0064248 |
|
| Molysis | -0.2183024 | 0.0767411 | 25.72237 | -2.8446604 | 0.0086028 |
|
| QIAamp | -0.1843464 | 0.0934638 | 25.80702 | -1.9723826 | 0.0593709 | |
| log10(Final_reads) | 0.2299546 | 0.0508548 | 26.50782 | 4.5217915 | 0.0001142 |
|
BPI - stratified (BAL)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| x | Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|---|
| (Intercept) | 0.3043331 | 0.5301349 | 20.47181 | 0.5740673 | 0.5721764 | |
| lyPMA | -0.0735485 | 0.1510204 | 17.01214 | -0.4870103 | 0.6324666 | |
| Benzonase | -0.1530734 | 0.1621430 | 18.28749 | -0.9440639 | 0.3574437 | |
| Host zero | -0.0421982 | 0.1691435 | 18.51848 | -0.2494819 | 0.8057344 | |
| Molysis | -0.3085461 | 0.1738005 | 18.65049 | -1.7752889 | 0.0921718 | |
| QIAamp | -0.1290205 | 0.1744898 | 18.66877 | -0.7394156 | 0.4688510 | |
| log10(Final_reads) | 0.0815548 | 0.0944078 | 20.17656 | 0.8638567 | 0.3978202 |
BPI - stratified (spt)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| x | Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|---|
| (Intercept) | 0.0286770 | 1.1254512 | 20.89128 | 0.0254805 | 0.9799136 | |
| lyPMA | -0.2775806 | 0.1614755 | 19.71412 | -1.7190265 | 0.1012737 | |
| Benzonase | -0.3995336 | 0.2041454 | 20.07039 | -1.9571028 | 0.0643899 | |
| Host zero | -0.4751333 | 0.3441062 | 20.44288 | -1.3807754 | 0.1822586 | |
| Molysis | -0.5569118 | 0.4014067 | 20.49523 | -1.3874001 | 0.1802223 | |
| QIAamp | -0.4309461 | 0.2985350 | 20.37752 | -1.4435365 | 0.1640679 | |
| log10(Final_reads) | 0.1055072 | 0.1918616 | 20.63853 | 0.5499133 | 0.5882813 |
*** Results: ***
3.1. Species richness - type * method specific. Sputum showed the highest changes, in every methods
3.2. Stratified analysis showed that some methods increased some alpha diversity indices. Changes were highest at sputum. However, stratified analysis showed Benzonase was the only one showed significant changes.
A4. Taxa beta diversity
Permanova (Taxa dist ~ log10(final reads) + sample_type + treatment + sample_type * treatment + subject_id) –> both stratified and nonstratified
Beta diversity figures
PCoA based on Bray-Curtis dissimilarities
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_rel, S.obs != 0 & sample_type %in% c("BAL", "Nasal swab", "Sputum"))
bray_perm_uni <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + treatment + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_uni_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + treatment,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_inter <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type * treatment + log10(Final_reads),
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_ns <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_bal <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "BAL"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_spt <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Sputum") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Sputum")
%>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
ordinate(phyloseq_rel_nz, method = "PCoA", distance = "bray") %>%
plot_ordination(phyloseq_rel_nz, ., col = "treatment", shape = "sample_type" ) +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_shape(name = "Sample type", labels = c("BAL", "Nasal swab", "Sputum")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), legend.spacing = unit(0, 'cm'), legend.key.height = unit(0.4, "cm")) + #legend.position = c(0.9, 0.4)
labs(tag = "E")
Beta diversity boxplot
Distances between samples within each subject. Mean distance between control <-> treatment for each subject
#distances of betadiversity - boxplots
bray_dist_long <- distance(phyloseq_rel_nz, method="bray") %>% as.matrix() %>% melt_dist() #making long data of distance matrices
#Adding sample type and treatment name.
#this can be also done by merging metadata into the `bray_dist_long`
names <- data.frame(str_split_fixed(bray_dist_long$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(bray_dist_long$iso2, "_", 3))
bray_dist_long$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
bray_dist_long$method_1 <- ifelse(grepl("control", bray_dist_long$iso1),"control",
ifelse(grepl("lyPMA", bray_dist_long$iso1),"lypma",
ifelse(grepl("benzonase", bray_dist_long$iso1),"benzonase",
ifelse(grepl("host", bray_dist_long$iso1),"host_zero",
ifelse(grepl("qia", bray_dist_long$iso1),"qiaamp",
ifelse(grepl("moly", bray_dist_long$iso1),"molysis",
NA))))))
#Adding data for iso 2 also should be done
bray_dist_long$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
bray_dist_long$method_2 <-ifelse(grepl("control", bray_dist_long$iso2),"control",
ifelse(grepl("lyPMA", bray_dist_long$iso2),"lypma",
ifelse(grepl("benzonase", bray_dist_long$iso2),"benzonase",
ifelse(grepl("host", bray_dist_long$iso2),"host_zero",
ifelse(grepl("qia", bray_dist_long$iso2),"qiaamp",
ifelse(grepl("moly", bray_dist_long$iso2),"molysis",
NA))))))
#subsetting distances of my interest
path_bray_dist_long_within_sampleid <- subset(bray_dist_long, bray_dist_long$sample_id_1 == bray_dist_long$sample_id_2)
path_bray_dist_long_within_sampleid_from_control <- subset(path_bray_dist_long_within_sampleid, path_bray_dist_long_within_sampleid$method_1 == "control" | path_bray_dist_long_within_sampleid$method_2 == "control" )
path_bray_dist_long_within_sampleid_from_control$treatment <- path_bray_dist_long_within_sampleid_from_control$method_1
path_bray_dist_long_within_sampleid_from_control$treatment <- ifelse(path_bray_dist_long_within_sampleid_from_control$treatment == "control", path_bray_dist_long_within_sampleid_from_control$method_2, path_bray_dist_long_within_sampleid_from_control$treatment)
path_bray_dist_long_within_sampleid_from_control$sample_type <- ifelse(grepl("NS", path_bray_dist_long_within_sampleid_from_control$iso1), "nasal_swab",
ifelse(grepl("CFB", path_bray_dist_long_within_sampleid_from_control$iso1), "Sputum",
ifelse(grepl("BAL", path_bray_dist_long_within_sampleid_from_control$iso1), "BAL", NA)))
label <- c("BAL","Nasal swab","Sputum")
names(label) <- c("BAL","nasal_swab","Sputum")
ggplot(path_bray_dist_long_within_sampleid_from_control, aes(y = dist, fill = treatment)) +
geom_boxplot() +
#scale_fill_manual(values = c(viridis(6)[2:6])) +
scale_fill_manual(values = c("#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Sample pair distances") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "F") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type, labeller = labeller(sample_type = label))
PERMANOVA test results
Subject as fixed effect vs strata term
Subject as fixed effect
adonis (dist ~ sample_type + log10(Final_reads) + treated + subject)
With strata
adonis (dist ~ sample_type + log10(Final_reads) + treated, strata = subject)
Strata term was employed rather than fixed effect
bray_perm_uni %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html", caption = "Subject id as fixed effect") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 13.429 | 0.371 | 56.251 | 0.000 |
|
| log10(Final reads) | 1 | 1.111 | 0.031 | 9.310 | 0.000 |
|
| Treatment | 5 | 1.213 | 0.033 | 2.033 | 0.001 |
|
| Subject | 10 | 11.639 | 0.321 | 9.751 | 0.000 |
|
| Residual | 74 | 8.833 | 0.244 | NA | NA | |
| Total | 92 | 36.225 | 1.000 | NA | NA |
bray_perm_uni_strata %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html", caption = "Subject id as strata term") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 13.429 | 0.371 | 27.550 | 0.000 |
|
| log10(Final reads) | 1 | 1.111 | 0.031 | 4.560 | 0.000 |
|
| Treatment | 5 | 1.213 | 0.033 | 0.996 | 0.025 |
|
| Residual | 84 | 20.472 | 0.565 | NA | NA | |
| Total | 92 | 36.225 | 1.000 | NA | NA |
Treatment significantly affected the beta-diversity.
Strata term is better representing our study aim.
What type of method affected the community at the most?
PERMANOVA on each treatment
dist ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp, strata = subject
QIAamp showed highest changes. But, it could be sample type specific.
bray_perm_strata %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 13.429 | 0.371 | 27.550 | 0.000 |
|
| log10(Final reads) | 1 | 1.111 | 0.031 | 4.560 | 0.000 |
|
| lyPMA | 1 | 0.140 | 0.004 | 0.574 | 0.521 | |
| Benzonase | 1 | 0.092 | 0.003 | 0.379 | 0.663 | |
| Host zero | 1 | 0.157 | 0.004 | 0.643 | 0.408 | |
| Molysis | 1 | 0.200 | 0.006 | 0.821 | 0.167 | |
| QIAamp | 1 | 0.624 | 0.017 | 2.561 | 0.003 |
|
| Residual | 84 | 20.472 | 0.565 | NA | NA | |
| Total | 92 | 36.225 | 1.000 | NA | NA |
PERMANOVA with interaction term
dist ~ sample_type * treatment + log10(Final_reads), strata = subject
It was sample type specific. We need stratified analysis
bray_perm_inter %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "sample_type:treatment" ~ 'Sample type * treatment',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 13.429 | 0.371 | 27.577 | 0 |
|
| Treatment | 5 | 1.184 | 0.033 | 0.973 | 0 |
|
| log10(Final reads) | 1 | 1.140 | 0.031 | 4.683 | 0 |
|
| Sample type * treatment | 10 | 2.455 | 0.068 | 1.008 | 0 |
|
| Residual | 74 | 18.017 | 0.497 | NA | NA | |
| Total | 92 | 36.225 | 1.000 | NA | NA |
Stratified (nasal swab)
lyPMA affected nasal swab samples.
bray_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.730 | 0.120 | 4.873 | 0.002 |
|
| Benzonase | 1 | 0.191 | 0.031 | 1.277 | 0.312 | |
| Host zero | 1 | 0.171 | 0.028 | 1.143 | 0.374 | |
| Molysis | 1 | 0.137 | 0.022 | 0.914 | 0.373 | |
| QIAamp | 1 | 0.254 | 0.042 | 1.694 | 0.142 | |
| log10(Final reads) | 1 | 0.428 | 0.070 | 2.861 | 0.031 |
|
| Residual | 28 | 4.192 | 0.687 | NA | NA | |
| Total | 34 | 6.103 | 1.000 | NA | NA |
Stratified (BAL)
QIAamp affected BAL samples
bray_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.100 | 0.010 | 0.272 | 0.325 | |
| Benzonase | 1 | 0.025 | 0.003 | 0.068 | 0.986 | |
| Host zero | 1 | 0.086 | 0.009 | 0.235 | 0.518 | |
| Molysis | 1 | 0.085 | 0.009 | 0.230 | 0.569 | |
| QIAamp | 1 | 0.229 | 0.024 | 0.623 | 0.004 |
|
| log10(Final reads) | 1 | 1.482 | 0.152 | 4.028 | 0.021 |
|
| Residual | 21 | 7.726 | 0.794 | NA | NA | |
| Total | 27 | 9.734 | 1.000 | NA | NA |
Stratified (spt)
Sputum was affected by Molysis and QIAamp.
bray_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.139 | 0.020 | 0.633 | 0.194 | |
| Benzonase | 1 | 0.037 | 0.005 | 0.170 | 0.765 | |
| Host zero | 1 | 0.171 | 0.025 | 0.777 | 0.135 | |
| Molysis | 1 | 0.436 | 0.063 | 1.985 | 0.012 |
|
| QIAamp | 1 | 0.953 | 0.137 | 4.339 | 0.000 |
|
| log10(Final reads) | 1 | 0.172 | 0.025 | 0.783 | 0.357 | |
| Residual | 23 | 5.052 | 0.726 | NA | NA | |
| Total | 29 | 6.960 | 1.000 | NA | NA |
Results:
4.1. Effect of each treatment on beta-diveristy was sample type specific.
4.2. NS showed no significant change by QIAamp method
4.3. Sputum showed high change after Molysis and QIAamp. However, here, high change may be meaning higher (better) detection efficiencies. Therefore further analysis is required.
Intermediate results
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal swab", "Sputum"),
lyPMA = c("No increase in final reads",
"No increase in final reads",
"No increase in final reads"),
Benzonase = c("No decrease in host %",
"No decrease in host %",
"No decrease in host %"),
`Host zero` = c(NA,
NA,
NA),
Molysis = c("No decrease in host %",
"High cahnge of failure in library pep",
NA),
QIAamp = c("No decrease in host %",
NA,
"No decrease in host %")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of issues of each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | No increase in final reads | No decrease in host % | NA | No decrease in host % | No decrease in host % |
| Nasal swab | No increase in final reads | No decrease in host % | NA | High cahnge of failure in library pep | NA |
| Sputum | No increase in final reads | No decrease in host % | NA | NA | No decrease in host % |
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal swab", "Sputum"),
lyPMA = c(NA,
"Beta changed",
"Shannon +"),
Benzonase = c(NA,
NA,
"Richness + Shannon + InvSimp +"),
`Host zero` = c(NA,
"Richness + Shannon + InvSimp + BPI -",
NA),
Molysis = c(NA,
"Richness + Shannon + InvSimp + BPI -",
"Beta changed"),
QIAamp = c("Beta changed",
NA,
"Beta changed")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of community changes induced by each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | NA | NA | NA | NA | Beta changed |
| Nasal swab | Beta changed | NA | Richness + Shannon + InvSimp + BPI - | Richness + Shannon + InvSimp + BPI - | NA |
| Sputum | Shannon + | Richness + Shannon + InvSimp + | NA | Beta changed | Beta changed |
Some methods were successful in increasing final reads and lowering host DNA%.
We have no idea weather some changes in diversities are due to deeper sequencing or contaminants
Further anlyses on individual taxa are required
A5. DA analysis for taxa, by sample type and treatment
Hypothesis: if a taxon is a contaminant induced by a treatment method, its DA analysis result should be associated with treatment covariate.
Both stratified and nonstratified were conducted.
Looked at other level groups as well - family and genus
Without interaction
feature ~ log10(final reads) + sample type + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subject)
With interaction
feature ~ log10(final reads) + sample type + treatment + sample type * treatment + (1|subject)
Stratified
feature ~ log10(final reads) + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subject)
MaAsLin settings : log transform, total sum scaling normalization
Results
#DA analysis - MaAslin
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
#Running MaAslin for all sample without decontam
#for taxa differentially abundant by host depletion method, look to see which ones overlap with potential contaminant taxa
# Maaslin - # # y ~ log(final reads) + sample_type + treatment -----------
#all samples
MaAslin - volcano plot
Without interaction
feature ~ log10(final reads) + sample type + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subject)
Most samples are differentially abundant by sample type
#Making significance table for figure
# Define a function to make species names italicized
species_italic <- function(data) {
names <- gsub("_", " ", rownames(data))
names <- gsub("[]]|[[]", "", names)
names <- gsub(" sp", " sp.", names)
names <- gsub(" sp.", "* sp.", names)
names <- gsub(" group", "* group.", names)
names <- ifelse(grepl("[*]", names), paste("*", names, sep = ""), paste("*", names, "*", sep = ""))
rownames(data) <- names
data
}
# Make a significance table for each figure (top 20 taxa)
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$min <- apply(sig_data, 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% select("feature", "lypma", "benzonase", "host_zero", "molysis", "qiaamp") %>% .[1:20,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>% column_to_rownames(var = "feature") %>% species_italic %>% select(-c("-"))
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA) %>% data.frame %>%
rename(lyPMA = lypma, Benzonase = benzonase, `Host zero` = host_zero, Molysis = molysis, QIAamp = qiaamp)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
fit_data_bal <- make_sig_table(fit_data_bal)
fit_data_ns <- make_sig_table(fit_data_ns)
fit_data_spt <- make_sig_table(fit_data_spt)
spt_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Sputum")) %in% fit_data_spt$data$feature)
fit_data_spt$rel <- cbind(spt_sig %>% otu_table %>% t, spt_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_spt$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
ns_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab")) %in% fit_data_ns$data$feature)
fit_data_ns$rel <- cbind(ns_sig %>% otu_table %>% t, ns_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_ns$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
bal_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "BAL"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "BAL")) %in% fit_data_bal$data$feature)
fit_data_bal$rel <- cbind(bal_sig %>% otu_table %>% t, bal_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_bal$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
#Volcano plot
ggplot(maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
labs(tag = "A") +
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#4daf4a", "#984ea3", "#f781bf", "#377eb8", "#ff7f00", "#ffff33", "#a65628"),
breaks = c("log10.Final_reads", "sample_type", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("log10 (Final reads)", "Sample type", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
guides(col = guide_legend(title = "Covariates", title.position = "top", nrow = 2))
MaAslin - table
feature ~ log10(final reads) + sample type + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subject)
Some taxa were changed due to treatment
Stratified analysis is required.
Table of associations had significant (q < 0.1) result
maaslin_all %>% subset(., .$qval < 0.1) %>% arrange(., .$feature) %>% .$metadata %>% table
## .
## benzonase host_zero log10.Final_reads lypma
## 8 6 50 14
## molysis qiaamp sample_type
## 9 2 66
MaAslin - can’t test global significance of a covariate with multi level.
(https://forum.biobakery.org/t/global-significance-test-for-multilevel-factor/3061)
Baloon plot - BAL
Mean relative abundances of top 20 taxa had low q-values.
No taxa changed after treatment
ggballoonplot(fit_data_bal$rel %>% gather(treatment, value, Control:QIAamp, factor_key=TRUE), fill = "value", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
scale_fill_viridis() +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top", axis.text.x = element_text(vjust = 0.5, hjust=0.5),
axis.text.y = ggtext::element_markdown()) +
geom_text(data = fit_data_bal$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment, significance, lyPMA:QIAamp, factor_key=TRUE) %>%
subset(., !is.na(.$significance)),
aes(y = feature, x = treatment, label = significance, col = significance), hjust = -2, vjust = 0.8, size = 5) +
guides(col = guide_legend(nrow = 3, override.aes = aes(label = "*", size = 10, color = "red"), title="Significance", title.position = "top", order = 3), size = guide_legend(title = "Relative abundance", title.position = "top", order = 1, nrow = 2), fill = F) +
scale_x_discrete(labels=c("control" = "Control", "lypma" = "lyPMA", "benzonase" = "Benzonase", "host_zero" = "Host-zero", "molysis" = "Molysis", "qiaamp" = "QIAamp")) +
scale_color_manual(values = c("red"), labels = c(expression(paste(italic("q"), "-value < 0.1", sep = ""))))
Baloon plot - nasal swabs
Mean relative abundances of top 20 taxa had low q-values.
Some taxa changed after treatment, but nothing was unique
ggballoonplot(fit_data_ns$rel %>% gather(treatment, value, Control:QIAamp, factor_key=TRUE), fill = "value", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
scale_fill_viridis() +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "B") +
theme(panel.grid.major = element_line(colour = "grey"), axis.text.x = element_text(vjust = 0.5, hjust=0.5), axis.text.y = ggtext::element_markdown(), legend.position = "top") +
geom_text(data = fit_data_ns$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment, significance, lyPMA:QIAamp, factor_key=TRUE) %>%
subset(., !is.na(.$significance)),
aes(y = feature, x = treatment, label = significance, col = significance), hjust = -2, vjust = 0.8, size = 5) +
guides(col = guide_legend(nrow = 3, override.aes = aes(label = "*", size = 10, color = "red"), title="Significance", title.position = "top", order = 3), size = guide_legend(title = "Relative abundance", title.position = "top", order = 1, nrow = 2), fill = F) +
scale_x_discrete(labels=c("control" = "Control", "lypma" = "lyPMA", "benzonase" = "Benzonase", "host_zero" = "Host-zero", "molysis" = "Molysis", "qiaamp" = "QIAamp")) +
scale_color_manual(values = c("red"), labels = c(expression(paste(italic("q"), "-value < 0.1", sep = ""))))
Baloon plot - Sputum
Mean relative abundances of top 20 taxa had low q-values.
Some taxa changed after treatment, but nothing was unique
ggballoonplot(fit_data_spt$rel %>% gather(treatment, value, Control:QIAamp, factor_key=TRUE), fill = "value", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
scale_fill_viridis() +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "C") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top", axis.text.x = element_text(vjust = 0.5, hjust=0.5),
axis.text.y = ggtext::element_markdown()) +
geom_text(data = fit_data_spt$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment, significance, lyPMA:QIAamp, factor_key=TRUE) %>%
subset(., !is.na(.$significance)),
aes(y = feature, x = treatment, label = significance, col = significance), hjust = -2, vjust = 0.8, size = 5) +
guides(col = guide_legend(nrow = 3, override.aes = aes(label = "*", size = 10, color = "red"), title="Significance", title.position = "top", order = 3), size = guide_legend(title = "Relative abundance", title.position = "top", order = 1, nrow = 2), fill = F) +
scale_x_discrete(labels=c("control" = "Control", "lypma" = "lyPMA", "benzonase" = "Benzonase", "host_zero" = "Host-zero", "molysis" = "Molysis", "qiaamp" = "QIAamp")) +
scale_color_manual(values = c("red"), labels = c(expression(paste(italic("q"), "-value < 0.1", sep = ""))))
Results
Some taxa were significantly changed after treatment. Among top 20, no taxa observed in only one treatment group. As their emergence were consistent across all treatment groups, they were considered as endogenus.
MaAslin with interaction
feature ~ log10(Final reads) + treatment + sample type + treatment * sample type (1|subject)
Some taxa were treaetment specific, after adjusting interaction of sample type * treatment
#Generating interaction term
sample_data(phyloseq_rel_nz)$sampletype_treatment <- paste(sample_data(phyloseq_rel_nz)$sample_type, sample_data(phyloseq_rel_nz)$treatment, sep = "*")
#interaction term - ggplot
ggplot(maaslin_interaction, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
#labs(tag = "A") +
ggtitle("MaAslin with interaction term")+
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3")) +
guides(col = guide_legend(title = "Fixed effects", title.position = "top", nrow = 1))
#Checking number of bugs differentially abundance with interaction term
cat("Number of differentially abundant bugs by each metadata")
## Number of differentially abundant bugs by each metadata
maaslin_interaction %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## log10.Final_reads sample_type sampletype_treatment
## 6 73 172
## treatment
## 24
MaAsLin interaction analysis
Hypothesis: if a sample is contaminated by some treatment, a change of taxon is likely to be associated with one treatment method
No taxa increased only because of one treatment method
cat("Some taxa were increased by each treatmment.\n But they are not contaminants, \nif they are present in most of the treatments")
## Some taxa were increased by each treatmment.
## But they are not contaminants,
## if they are present in most of the treatments
maaslin_interaction %>% subset(., .$qval < 0.1 & .$metadata == "treatment") %>% .$feature %>% table %>% data.frame %>% arrange(-Freq) %>% rename(Feature = ".") %>% kbl(format = "html", caption = "Table of taxa differentially abundant by treatment") %>%
kable_styling(full_width = 0, html_font = "serif")
| Feature | Freq |
|---|---|
| Candida_albicans | 5 |
| Candida_dubliniensis | 5 |
| Candida_parapsilosis | 5 |
| Cupriavidus_sp | 3 |
| Sutterella_parvirubra | 3 |
| Gemella_haemolysans | 2 |
| Peptostreptococcus_stomatis | 1 |
cat("Most of taxa were found on most of treatments.")
## Most of taxa were found on most of treatments.
cat("Some taxa were treatment specific, only to one group")
## Some taxa were treatment specific, only to one group
subset(maaslin_interaction, maaslin_interaction$feature %in% (maaslin_interaction %>% subset(., .$qval < 0.1 & .$metadata == "treatment") %>%
.$feature %>% table %>% data.frame %>% subset(., Freq == 1) %>% .$. %>% as.character())) %>% subset(., .$qval < 0.1) %>% select(c("feature", "metadata", "value", "coef", "qval")) %>% remove_rownames() %>% kbl(format = "html", caption = "Table of taxa specific to one treatment group") %>%
kable_styling(full_width = 0, html_font = "serif")
| feature | metadata | value | coef | qval |
|---|---|---|---|---|
| Peptostreptococcus_stomatis | treatment | lyPMA | -3.024443 | 0.0281414 |
No taxa was increased due to one treatmemnt.
MaAslin at Family level
Some family were treatment specific.
ggplot(maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
labs(tag = "A") +
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#4daf4a", "#984ea3", "#e41a1c"),
breaks = c("log10.Final_reads", "sample_type", "treatment"),
labels = c("log10 (Final reads)", "Sample type", "Treatment")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
guides(col = guide_legend(title = "Covariates", title.position = "top", nrow = 2))
MaAslin family - table
feature ~ log10(final reads) + treatment (categorical)
Table of associations had significant (q < 0.1) result
Molysis may induced contaminants (Streptococcaceae)
maaslin_all %>% subset(., .$qval < 0.1) %>% arrange(., .$feature) %>% .$metadata %>% table
## .
## log10.Final_reads sample_type treatment
## 19 25 24
subset(maaslin_all, maaslin_all$feature %in% (maaslin_all %>% subset(., .$qval < 0.1) %>%
.$feature %>% table %>% data.frame %>% subset(., Freq == 1) %>% .$. %>% as.character())) %>% subset(., .$qval < 0.1) %>% select(c("feature", "metadata", "value", "coef", "qval")) %>% subset(.,.$metadata == "treatment") %>%
remove_rownames() %>% kbl(format = "html", caption = "Table of family specific to one treatment group") %>%
kable_styling(full_width = 0, html_font = "serif")
| feature | metadata | value | coef | qval |
|---|---|---|---|---|
| Streptococcaceae | treatment | Molysis | 2.092584 | 0.0486049 |
A5 Results:
5.1. Both non-stratified and stratified analysis showed that there were no potential contaminants at species level.
5.2. Molysis may inducted 1 potential contaminants (Streptococcaceae), at family level
5.3. After adding control data, MaAslin needs to be reanalyzed. Adding controls (mock communities) for each treatment group will show more statistically valid results in y ~ log(final reads) + sample_type + treatment, (re = subject_id))
A6. Decontam - stratified by treatment
input of DNA concentration: 16S qPCR data
https://github.com/benjjneb/decontam/issues/33
Ben Callahan: But in the more limited testing on qPCR data the method still seems to work, and other publications report strong patterns of inverse frequency of contaminants using qPCR data - which is the pattern the frequency method relies on.
Both stratified and nonstratified
Strategy:
2.4.1. run decontam for all samples (common contaminants, by extraction)
2.4.2. stratify decontam analysis per each treatment method (contaminants by depletion methods)
Results
decontam - all sample
Listeria floridensis could be a potential contaminant
# Decontam package --------------------------------------------------------
# common contaminants across all the treatment methods
#Decontam - were there any contaminants?#
sample_data(phyloseq$phyloseq_rel)$is.neg <- grepl("Neg. ext.", sample_data(phyloseq$phyloseq_rel)$sample_type)
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_rel, S.obs != 0)
#With all sampels
dec_f_all <- isContaminant(phyloseq_rel_nz, method="frequency", conc="DNA_bac_well")
dec_p_all <- isContaminant(phyloseq_rel_nz, method="prevalence", neg="is.neg", threshold=0.5)
dec_c_all <- isContaminant(phyloseq_rel_nz, method="combined", neg="is.neg", conc = "DNA_bac_well")
cat("decontam frequency - all sample")
## decontam frequency - all sample
dec_f_all %>% subset(.,.$contaminant)
cat("decontam prevalence - all sample")
## decontam prevalence - all sample
dec_p_all %>% subset(.,.$contaminant)
cat("decontam combined - all sample")
## decontam combined - all sample
dec_c_all %>% subset(.,.$contaminant)
decontam - stratified
Stratified analysis showed no contaminants in NS and BAL
Sputum may have Corynebacterium pseudodiphtheriticum and Candida albicans as contaminants.
#Stratified by sample type
cat("decontam prevalence - BAL")
## decontam prevalence - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg. ext.")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam prevalence - Nasal swab")
## decontam prevalence - Nasal swab
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal swab", "Neg. ext.")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam prevalence - Sputum")
## decontam prevalence - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg. ext.")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam frequency - BAL")
## decontam frequency - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg. ext.")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam frequency - Nasal swab")
## decontam frequency - Nasal swab
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal swab", "Neg. ext.")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam frequency - Sputum")
## decontam frequency - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg. ext.")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - BAL")
## decontam combined - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg. ext.")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Nasal swab")
## decontam combined - Nasal swab
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal swab", "Neg. ext.")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Sputum")
## decontam combined - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg. ext.")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
A6 Results:
6.1. Listeria floridensis could be a potential contaminant
6.2. Else, BAL and NS are free from contaminants, and sputum may have Corynebacterium pseudodiphtheriticum and Candida albicans as contaminants.
Further analysis is required after adding data of controls.
A7. LM of function alpha diversity
sample_data <- sample_data(phyloseq$phyloseq_path_rpkm) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_path_rpkm, S.obs != 0 & sample_type %in% c("BAL", "Nasal swab", "Sputum"))
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
sample_data(phyloseq_rel_nz)$sampletype_treatment <- paste(sample_data(phyloseq_rel_nz)$sample_type, sample_data(phyloseq_rel_nz)$treatment, sep = ":")
Figure - Alpha diversity
Alpha diversity of functional analysis reult showed similar pattern with taxa result.
Similar approach was employed.
f4a <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpkm), sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(y = S.obs)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Species richness") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "A") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4b <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpkm), sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(y = data_shannon)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Shannon") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "B") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4c <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpkm), sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(y = data_invsimpson)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Inverse simpson") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "C") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4d <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpkm), sample_data$sample_type %in% c("Sputum", "Nasal swab", "BAL")), aes(y = dbp)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Berger-Parker index") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "D") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
ggarrange(f4a, f4b, f4c, f4d, common.legend = T, align = "hv") # alpha diversity plots
Function richness
Alpha diversity chould be having changes due to treatment.
Both stratified and nonstratified analyses were conducted.
All samples:
S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
Stratified:
S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
Function richness (all samples & interaction term) - ANOVA
Interaction term showed high p values. However, it could be due to even effect sample type * treatment. Interaction term will be tested.
sample_data <- sample_data(phyloseq$phyloseq_path_rpkm) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
lmer_sob <- lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_sob %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 31544.55 | 15772.274 | 2 | 10.06740 | 10.123058 | 0.0038929 |
|
| treatment | 33352.07 | 6670.414 | 5 | 64.14678 | 4.281246 | 0.0020153 |
|
| log10(Final_reads) | 35804.04 | 35804.040 | 1 | 70.54314 | 22.979969 | 0.0000088 |
|
| sample_type * treatment | 29095.43 | 2909.543 | 10 | 63.26761 | 1.867421 | 0.0667651 |
Function richness (all samples & interaction term)
Effect of some treatment was neutralized by interactin term. Therefore, the association was sample_type specific.
Stratified analysis will be conducted.
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -271.32533 | 63.94609 | 72.47916 | -4.2430322 | 0.0000643 |
|
| Nasal swab | 66.71179 | 39.44578 | 32.64336 | 1.6912273 | 0.1003209 | |
| Sputum | 142.80087 | 36.31835 | 41.80438 | 3.9319203 | 0.0003111 |
|
| lyPMA | 59.31768 | 31.18858 | 64.39962 | 1.9019036 | 0.0616583 | |
| Benzonase | 96.59262 | 31.65626 | 66.97650 | 3.0512965 | 0.0032643 |
|
| Host zero | 129.84121 | 32.25492 | 67.36194 | 4.0254694 | 0.0001466 |
|
| Molysis | 150.32258 | 32.65321 | 67.58822 | 4.6036081 | 0.0000189 |
|
| QIAamp | 86.27368 | 32.71224 | 67.61999 | 2.6373516 | 0.0103578 |
|
| log10(Final_reads) | 52.77944 | 11.01007 | 70.54314 | 4.7937427 | 0.0000088 |
|
| Nasal swab * lyPMA | -23.81209 | 39.44179 | 64.87664 | -0.6037273 | 0.5481301 | |
| Sputum * lyPMA | -5.02253 | 39.40167 | 62.83470 | -0.1274700 | 0.8989755 | |
| Nasal swab * Benzonase | -80.25929 | 38.01990 | 65.40988 | -2.1109811 | 0.0386006 |
|
| Sputum * Benzonase | -52.43718 | 38.65224 | 63.57018 | -1.3566400 | 0.1796940 | |
| Nasal swab * Host zero | -106.90271 | 36.81521 | 64.23327 | -2.9037647 | 0.0050492 |
|
| Sputum * Host zero | -76.35866 | 38.92443 | 62.75019 | -1.9617158 | 0.0542352 | |
| Nasal swab * Molysis | -106.97855 | 38.54815 | 66.00383 | -2.7751931 | 0.0071693 |
|
| Sputum * Molysis | -108.15918 | 39.34418 | 62.63915 | -2.7490513 | 0.0077999 |
|
| Nasal swab * QIAamp | -75.62038 | 36.68939 | 64.06591 | -2.0610968 | 0.0433576 |
|
| Sputum * QIAamp | -40.59820 | 38.59881 | 63.08680 | -1.0517993 | 0.2969039 |
All terms were significant.
Function richness - stratified (NS)
Some treatment enabled discovering more functions in nasal swabs
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 68.35891 | 68.76371 | 28 | 0.9941131 | 0.3286817 | |
| lyPMA | 13.88909 | 17.11543 | 28 | 0.8114955 | 0.4239262 | |
| Benzonase | 20.95703 | 16.24183 | 28 | 1.2903126 | 0.2074965 | |
| Host zero | 56.43052 | 18.37320 | 28 | 3.0713495 | 0.0047048 |
|
| Molysis | 51.56131 | 16.32822 | 28 | 3.1578029 | 0.0037880 |
|
| QIAamp | 54.41631 | 19.71859 | 28 | 2.7596444 | 0.0100871 |
|
| log10(Final_reads) | 12.25116 | 10.45340 | 28 | 1.1719787 | 0.2510816 |
Function richness (BAL)
Higher Final reads enables more discovery of functions.
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -640.83129 | 99.14027 | 19.00295 | -6.4638848 | 0.0000034 |
|
| lyPMA | 14.46191 | 33.76511 | 18.36963 | 0.4283092 | 0.6734060 | |
| Benzonase | 16.49562 | 35.65062 | 19.98741 | 0.4627021 | 0.6485757 | |
| Host zero | 39.81066 | 36.93641 | 19.98154 | 1.0778162 | 0.2939563 | |
| Molysis | 54.18626 | 37.78713 | 19.92262 | 1.4339874 | 0.1670859 | |
| QIAamp | -10.73947 | 37.91284 | 19.91125 | -0.2832674 | 0.7798961 | |
| log10(Final_reads) | 124.09731 | 17.94627 | 17.34430 | 6.9149360 | 0.0000022 |
|
Function richness (sputum)
Sputum showed no changes. This may due to an enrichment of richness in control groups.
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -127.58303 | 198.98655 | 21.38568 | -0.6411641 | 0.5282282 | |
| lyPMA | 54.38241 | 28.77622 | 19.88708 | 1.8898385 | 0.0734460 | |
| Benzonase | 44.29211 | 36.29072 | 20.41549 | 1.2204804 | 0.2361930 | |
| Host zero | 53.75292 | 61.00733 | 20.95720 | 0.8810895 | 0.3882627 | |
| Molysis | 42.48496 | 71.13867 | 21.03201 | 0.5972132 | 0.5567405 | |
| QIAamp | 45.90423 | 52.95334 | 20.86328 | 0.8668807 | 0.3958683 | |
| log10(Final_reads) | 52.61786 | 33.96577 | 21.23460 | 1.5491436 | 0.1361234 |
Shannon function
All samples:
Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
Stratified:
Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)
ANOVA Shannon
p - value = 0.059 for the interaction term. Interaction term will be tested.
lmer_shannon <- lmer(data_shannon ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_shannon %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.1823346 | 0.0911673 | 2 | 9.000626 | 6.3262277 | 0.0192428 |
|
| treatment | 0.2025353 | 0.0405071 | 5 | 64.449929 | 2.8108418 | 0.0233602 |
|
| log10(Final_reads) | 0.0012697 | 0.0012697 | 1 | 72.892849 | 0.0881036 | 0.7674461 | |
| sample_type * treatment | 0.2764134 | 0.0276413 | 10 | 63.238098 | 1.9180715 | 0.0589214 |
Shannon (all samples & interaction term)
Nasal swab showed some sample type * treatment specific effect. Stratified analysis will be conducted.
#Shannon
lmer_shannon %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.5913112 | 0.1886495 | 72.61690 | 3.1344432 | 0.0024844 |
|
| Nasal swab | 0.3690633 | 0.1053813 | 38.98621 | 3.5021710 | 0.0011737 |
|
| Sputum | 0.2993038 | 0.0997287 | 55.53917 | 3.0011805 | 0.0040232 |
|
| lyPMA | 0.2036799 | 0.0942992 | 65.15176 | 2.1599339 | 0.0344627 |
|
| Benzonase | 0.0994949 | 0.0949856 | 68.98341 | 1.0474738 | 0.2985374 | |
| Host zero | 0.2573740 | 0.0966602 | 69.52729 | 2.6626671 | 0.0096242 |
|
| Molysis | 0.3398170 | 0.0977799 | 69.83854 | 3.4753267 | 0.0008813 |
|
| QIAamp | 0.0878239 | 0.0979461 | 69.88172 | 0.8966555 | 0.3729814 | |
| log10(Final_reads) | -0.0096706 | 0.0325805 | 72.89285 | -0.2968225 | 0.7674461 | |
| Nasal swab * lyPMA | -0.2226470 | 0.1190898 | 65.91555 | -1.8695722 | 0.0659859 | |
| Sputum * lyPMA | -0.0936705 | 0.1196459 | 62.61750 | -0.7828973 | 0.4366384 | |
| Nasal swab * Benzonase | -0.0512598 | 0.1146272 | 66.66595 | -0.4471872 | 0.6561894 | |
| Sputum * Benzonase | 0.0043312 | 0.1171417 | 63.75882 | 0.0369743 | 0.9706210 | |
| Nasal swab * Host zero | -0.2539966 | 0.1113714 | 64.80534 | -2.2806270 | 0.0258673 |
|
| Sputum * Host zero | -0.2117509 | 0.1182234 | 62.48276 | -1.7911090 | 0.0781193 | |
| Nasal swab * Molysis | -0.3660377 | 0.1160263 | 67.46898 | -3.1547817 | 0.0023993 |
|
| Sputum * Molysis | -0.2802693 | 0.1195318 | 62.32165 | -2.3447254 | 0.0222411 |
|
| Nasal swab * QIAamp | -0.1937200 | 0.1110482 | 64.48281 | -1.7444684 | 0.0858422 | |
| Sputum * QIAamp | -0.0639711 | 0.1171312 | 63.00030 | -0.5461489 | 0.5868925 |
Shannon - stratified (NS)
QIAamp decreased alpha diversity of nasal swabs
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.9021024 | 0.1888432 | 28 | 4.7769924 | 0.0000510 |
|
| lyPMA | -0.0120429 | 0.0470035 | 28 | -0.2562131 | 0.7996594 | |
| Benzonase | 0.0493991 | 0.0446043 | 28 | 1.1074968 | 0.2775028 | |
| Host zero | -0.0017956 | 0.0504576 | 28 | -0.0355870 | 0.9718642 | |
| Molysis | -0.0302035 | 0.0448416 | 28 | -0.6735608 | 0.5061130 | |
| QIAamp | -0.1176817 | 0.0541524 | 28 | -2.1731574 | 0.0383841 |
|
| log10(Final_reads) | -0.0007741 | 0.0287078 | 28 | -0.0269635 | 0.9786801 |
Shannon - stratified (BAL)
No changes found at BAL
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.3164981 | 0.4384386 | 19.99999 | 0.7218756 | 0.4787266 | |
| lyPMA | 0.1566219 | 0.1595240 | 20.00000 | 0.9818075 | 0.3379233 | |
| Benzonase | 0.0081445 | 0.1629779 | 20.00000 | 0.0499731 | 0.9606395 | |
| Host zero | 0.1579336 | 0.1676636 | 20.00000 | 0.9419670 | 0.3574494 | |
| Molysis | 0.2354040 | 0.1708188 | 20.00000 | 1.3780916 | 0.1833989 | |
| QIAamp | -0.0173032 | 0.1712883 | 20.00000 | -0.1010181 | 0.9205420 | |
| log10(Final_reads) | 0.0484117 | 0.0775187 | 19.99999 | 0.6245166 | 0.5393481 |
Shannon - stratified (Spt)
Every covariate showed significant results against alpha diversity. Evenness of sputum affected by treatments.
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 1.9046747 | 0.3134676 | 20.38905 | 6.076146 | 0.0000056 |
|
| lyPMA | 0.2040040 | 0.0444997 | 19.33828 | 4.584393 | 0.0001943 |
|
| Benzonase | 0.2510413 | 0.0564000 | 19.52563 | 4.451090 | 0.0002582 |
|
| Host zero | 0.3368492 | 0.0953245 | 19.72386 | 3.533710 | 0.0021207 |
|
| Molysis | 0.4059052 | 0.1112411 | 19.75200 | 3.648878 | 0.0016224 |
|
| QIAamp | 0.2702564 | 0.0826606 | 19.68885 | 3.269471 | 0.0038945 |
|
| log10(Final_reads) | -0.1837105 | 0.0532273 | 19.82942 | -3.451430 | 0.0025476 |
|
Simpson function
Inverse Simpson of all samples:
Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
Stratified:
Inverse Simpson ~ treatment + log10 (Final_reads) + (1|original_sample)
Inv Simp - ANOVA
p - value = 0.096 for the interaction term. Interaction term will be tested.
lmer_invsimpson <- lmer(data_invsimpson ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_invsimpson %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.0267838 | 0.0133919 | 2 | 6.835601 | 0.3224571 | 0.7348127 | |
| treatment | 0.1762910 | 0.0352582 | 5 | 65.324395 | 0.8489653 | 0.5201930 | |
| log10(Final_reads) | 0.3696492 | 0.3696492 | 1 | 70.881743 | 8.9006066 | 0.0039077 |
|
| sample_type * treatment | 0.7115045 | 0.0711505 | 10 | 64.116101 | 1.7131977 | 0.0968588 |
Inv. Simpson (all samples & interaction term)
Sample type specific effect was observed. Stratified anlysis required.
#Simpson
lmer_invsimpson %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.5041672 | 0.3100373 | 72.98246 | 8.0769868 | 0.0000000 |
|
| Nasal swab | 0.2824300 | 0.1602334 | 39.89109 | 1.7626164 | 0.0856259 | |
| Sputum | 0.3733177 | 0.1575829 | 65.69179 | 2.3690243 | 0.0207839 |
|
| lyPMA | 0.3355368 | 0.1587771 | 66.80622 | 2.1132571 | 0.0383176 |
|
| Benzonase | 0.0394673 | 0.1582723 | 71.20768 | 0.2493632 | 0.8037978 | |
| Host zero | 0.2243663 | 0.1607711 | 71.74004 | 1.3955639 | 0.1671506 | |
| Molysis | 0.3476547 | 0.1624552 | 72.01996 | 2.1400038 | 0.0357425 |
|
| QIAamp | 0.0491110 | 0.1627061 | 72.05712 | 0.3018387 | 0.7636445 | |
| log10(Final_reads) | -0.1584764 | 0.0531196 | 70.88174 | -2.9833884 | 0.0039077 |
|
| Nasal swab * lyPMA | -0.4433734 | 0.2001303 | 67.80520 | -2.2154242 | 0.0300927 |
|
| Sputum * lyPMA | -0.3749646 | 0.2026651 | 63.40668 | -1.8501686 | 0.0689497 | |
| Nasal swab * Benzonase | -0.0224270 | 0.1922834 | 68.59286 | -0.1166351 | 0.9074900 | |
| Sputum * Benzonase | -0.1906563 | 0.1979198 | 64.84986 | -0.9633004 | 0.3389752 | |
| Nasal swab * Host zero | -0.3133525 | 0.1877081 | 66.20325 | -1.6693600 | 0.0997686 | |
| Sputum * Host zero | -0.4532172 | 0.2003127 | 63.25108 | -2.2625485 | 0.0271066 |
|
| Nasal swab * Molysis | -0.5486404 | 0.1942378 | 69.41775 | -2.8245812 | 0.0061753 |
|
| Sputum * Molysis | -0.5345178 | 0.2025954 | 63.08079 | -2.6383508 | 0.0104810 |
|
| Nasal swab * QIAamp | -0.1871327 | 0.1873220 | 65.70759 | -0.9989895 | 0.3214646 | |
| Sputum * QIAamp | -0.3129724 | 0.1982404 | 63.88243 | -1.5787516 | 0.1193324 |
Inverse Simpson - stratified (NS)
Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
Nasal swab showed changes after Molysis treatment
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.4341612 | 0.3741168 | 28 | 6.5064209 | 0.0000005 |
|
| lyPMA | -0.0792832 | 0.0931184 | 28 | -0.8514237 | 0.4017574 | |
| Benzonase | 0.0112922 | 0.0883655 | 28 | 0.1277900 | 0.8992286 | |
| Host zero | -0.1324711 | 0.0999615 | 28 | -1.3252214 | 0.1958106 | |
| Molysis | -0.2120235 | 0.0888356 | 28 | -2.3866962 | 0.0239973 |
|
| QIAamp | -0.1955251 | 0.1072813 | 28 | -1.8225468 | 0.0790685 | |
| log10(Final_reads) | -0.1054975 | 0.0568729 | 28 | -1.8549701 | 0.0741586 |
Inverse Simpson - stratified (BAL)
No changes found at BAL
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.7079552 | 0.7370091 | 18.45226 | 3.6742494 | 0.0016771 |
|
| lyPMA | 0.3560120 | 0.2545271 | 18.46585 | 1.3987194 | 0.1784686 | |
| Benzonase | 0.0769610 | 0.2668231 | 19.99899 | 0.2884345 | 0.7759824 | |
| Host zero | 0.2671909 | 0.2760392 | 19.90623 | 0.9679453 | 0.3446858 | |
| Molysis | 0.3937560 | 0.2821570 | 19.78512 | 1.3955212 | 0.1783255 | |
| QIAamp | 0.0956828 | 0.2830622 | 19.76444 | 0.3380276 | 0.7389065 | |
| log10(Final_reads) | -0.1967495 | 0.1328208 | 16.24623 | -1.4813153 | 0.1576569 |
Inverse Simpson - stratified (spt)
Changes associated with deeper sequencing with sputum
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 4.2511748 | 0.4286288 | 20.78282 | 9.9180809 | 0.0000000 |
|
| lyPMA | 0.0879014 | 0.0615090 | 19.54384 | 1.4290812 | 0.1687705 | |
| Benzonase | 0.0482351 | 0.0777588 | 19.91984 | 0.6203172 | 0.5420814 | |
| Host zero | 0.1656569 | 0.1310623 | 20.31362 | 1.2639552 | 0.2205584 | |
| Molysis | 0.2823279 | 0.1528856 | 20.36901 | 1.8466618 | 0.0793776 | |
| QIAamp | 0.0699277 | 0.1137064 | 20.24448 | 0.6149849 | 0.5454161 | |
| log10(Final_reads) | -0.3942383 | 0.0730735 | 20.52069 | -5.3950901 | 0.0000256 |
|
Burger-Parker index Function
BPI of all samples:
BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
BPI stratified:
BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
BPI -ANOVA
p - value = 0.072 for the interaction term. Interaction term will be tested.
lmer_dbp <- lmer(dbp ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_dbp %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.0031047 | 0.0015524 | 2 | 5.797922 | 0.2268168 | 0.8038192 | |
| treatment | 0.0257612 | 0.0051522 | 5 | 65.443949 | 0.7527933 | 0.5870575 | |
| log10(Final_reads) | 0.0718488 | 0.0718488 | 1 | 69.374684 | 10.4978010 | 0.0018365 |
|
| sample_type * treatment | 0.1252121 | 0.0125212 | 10 | 64.288564 | 1.8294710 | 0.0729245 |
BPI - all samples & interaction term
Sample type specific effect was observed. Stratified anlysis required.
#BPI
lmer_dbp %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.3494454 | 0.1248399 | 72.85764 | 2.7991472 | 0.0065536 |
|
| Nasal swab | -0.0949470 | 0.0634800 | 38.59201 | -1.4957002 | 0.1428654 | |
| Sputum | -0.1489322 | 0.0631257 | 67.11730 | -2.3592951 | 0.0212271 |
|
| lyPMA | -0.1354895 | 0.0643160 | 67.14843 | -2.1066210 | 0.0388903 |
|
| Benzonase | -0.0126454 | 0.0639374 | 71.58151 | -0.1977775 | 0.8437797 | |
| Host zero | -0.0763098 | 0.0649149 | 72.08571 | -1.1755356 | 0.2436485 | |
| Molysis | -0.1501657 | 0.0655753 | 72.34056 | -2.2899747 | 0.0249412 |
|
| QIAamp | -0.0134435 | 0.0656737 | 72.37364 | -0.2047007 | 0.8383806 | |
| log10(Final_reads) | 0.0690874 | 0.0213231 | 69.37468 | 3.2400310 | 0.0018365 |
|
| Nasal swab * lyPMA | 0.1885595 | 0.0810249 | 68.19355 | 2.3271804 | 0.0229322 |
|
| Sputum * lyPMA | 0.1784163 | 0.0822236 | 63.57164 | 2.1698910 | 0.0337594 |
|
| Nasal swab * Benzonase | 0.0146779 | 0.0778149 | 68.96328 | 0.1886253 | 0.8509405 | |
| Sputum * Benzonase | 0.0964409 | 0.0802472 | 65.06581 | 1.2017975 | 0.2337980 | |
| Nasal swab * Host zero | 0.1083404 | 0.0760586 | 66.47624 | 1.4244332 | 0.1589988 | |
| Sputum * Host zero | 0.1771872 | 0.0812745 | 63.42192 | 2.1801082 | 0.0329681 |
|
| Nasal swab * Molysis | 0.2291696 | 0.0785680 | 69.77276 | 2.9168336 | 0.0047530 |
|
| Sputum * Molysis | 0.2317119 | 0.0822065 | 63.25919 | 2.8186574 | 0.0064296 |
|
| Nasal swab * QIAamp | 0.0571663 | 0.0759204 | 65.94314 | 0.7529771 | 0.4541435 | |
| Sputum * QIAamp | 0.1292876 | 0.0804119 | 64.06419 | 1.6078168 | 0.1127930 |
BPI - stratified (NS)
Molysis changed BPI
lmer(dbp ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.4017357 | 0.1498449 | 28 | 2.6810102 | 0.0121638 |
|
| lyPMA | 0.0412304 | 0.0372967 | 28 | 1.1054691 | 0.2783650 | |
| Benzonase | 0.0046045 | 0.0353930 | 28 | 0.1300974 | 0.8974195 | |
| Host zero | 0.0504577 | 0.0400376 | 28 | 1.2602602 | 0.2179777 | |
| Molysis | 0.0834844 | 0.0355813 | 28 | 2.3463015 | 0.0262687 |
|
| QIAamp | 0.0677258 | 0.0429693 | 28 | 1.5761426 | 0.1262256 | |
| log10(Final_reads) | 0.0468284 | 0.0227793 | 28 | 2.0557472 | 0.0492388 |
|
BPI - stratified (BAL)
No changes
lmer(dbp ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.2445672 | 0.2956552 | 18.00596 | 0.8272041 | 0.4189496 | |
| lyPMA | -0.1456413 | 0.1032851 | 18.57372 | -1.4100912 | 0.1750396 | |
| Benzonase | -0.0313798 | 0.1076447 | 19.98145 | -0.2915125 | 0.7736642 | |
| Host zero | -0.0977753 | 0.1112261 | 19.81800 | -0.8790675 | 0.3898992 | |
| Molysis | -0.1733099 | 0.1136103 | 19.64832 | -1.5254772 | 0.1430763 | |
| QIAamp | -0.0368287 | 0.1139635 | 19.62045 | -0.3231626 | 0.7499909 | |
| log10(Final_reads) | 0.0886954 | 0.0530745 | 15.42411 | 1.6711477 | 0.1148535 |
BPI - stratified (spt)
Changes only associated with higher final reads
lmer(dbp ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -0.2701952 | 0.1781304 | 20.96674 | -1.5168392 | 0.1442409 | |
| lyPMA | -0.0007038 | 0.0256334 | 19.62396 | -0.0274558 | 0.9783735 | |
| Benzonase | 0.0154609 | 0.0323782 | 20.05945 | 0.4775100 | 0.6381608 | |
| Host zero | -0.0343045 | 0.0545240 | 20.51301 | -0.6291638 | 0.5361864 | |
| Molysis | -0.0792267 | 0.0635945 | 20.57650 | -1.2458118 | 0.2268328 | |
| QIAamp | 0.0014680 | 0.0473113 | 20.43364 | 0.0310278 | 0.9755484 | |
| log10(Final_reads) | 0.1498736 | 0.0303847 | 20.74986 | 4.9325289 | 0.0000726 |
|
A8. Function beta diversity
Permanova (Taxa dist ~ log10(final reads) + sample_type + treatment + sample_type * treatment + subject_id) –> both stratified and nonstratified
Beta diversity figure
PCoA based on Bray-Curtis dissimilarities
bray_perm_uni_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + treatment,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_inter <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type * treatment + log10(Final_reads),
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_ns <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_bal <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "BAL"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_spt <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Sputum") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Sputum")
%>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
ordinate(phyloseq_rel_nz, method = "PCoA", distance = "bray") %>%
plot_ordination(phyloseq_rel_nz, ., col = "treatment", shape = "sample_type" ) +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_shape(name = "Sample type", labels = c("BAL", "Nasal swab", "Sputum")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), legend.spacing = unit(0, 'cm'), legend.key.height = unit(0.4, "cm")) + #legend.position = c(0.9, 0.4)
labs(tag = "E")
Beta diversity boxplot (Function)
Distances between samples within each subject. Mean distance between control <-> treatment for each subject
#distances of betadiversity - boxplots
bray_dist_long <- distance(phyloseq_rel_nz, method="bray") %>% as.matrix() %>% melt_dist() #making long data of distance matrices
#Adding sample type and treatment name.
#this can be also done by merging metadata into the `bray_dist_long`
names <- data.frame(str_split_fixed(bray_dist_long$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(bray_dist_long$iso2, "_", 3))
bray_dist_long$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
bray_dist_long$method_1 <- ifelse(grepl("control", bray_dist_long$iso1),"control",
ifelse(grepl("lyPMA", bray_dist_long$iso1),"lypma",
ifelse(grepl("benzonase", bray_dist_long$iso1),"benzonase",
ifelse(grepl("host", bray_dist_long$iso1),"host_zero",
ifelse(grepl("qia", bray_dist_long$iso1),"qiaamp",
ifelse(grepl("moly", bray_dist_long$iso1),"molysis",
NA))))))
#Adding data for iso 2 also should be done
bray_dist_long$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
bray_dist_long$method_2 <-ifelse(grepl("control", bray_dist_long$iso2),"control",
ifelse(grepl("lyPMA", bray_dist_long$iso2),"lypma",
ifelse(grepl("benzonase", bray_dist_long$iso2),"benzonase",
ifelse(grepl("host", bray_dist_long$iso2),"host_zero",
ifelse(grepl("qia", bray_dist_long$iso2),"qiaamp",
ifelse(grepl("moly", bray_dist_long$iso2),"molysis",
NA))))))
#subsetting distances of my interest
path_bray_dist_long_within_sampleid <- subset(bray_dist_long, bray_dist_long$sample_id_1 == bray_dist_long$sample_id_2)
path_bray_dist_long_within_sampleid_from_control <- subset(path_bray_dist_long_within_sampleid, path_bray_dist_long_within_sampleid$method_1 == "control" | path_bray_dist_long_within_sampleid$method_2 == "control" )
path_bray_dist_long_within_sampleid_from_control$treatment <- path_bray_dist_long_within_sampleid_from_control$method_1
path_bray_dist_long_within_sampleid_from_control$treatment <- ifelse(path_bray_dist_long_within_sampleid_from_control$treatment == "control", path_bray_dist_long_within_sampleid_from_control$method_2, path_bray_dist_long_within_sampleid_from_control$treatment)
path_bray_dist_long_within_sampleid_from_control$sample_type <- ifelse(grepl("NS", path_bray_dist_long_within_sampleid_from_control$iso1), "nasal_swab",
ifelse(grepl("CFB", path_bray_dist_long_within_sampleid_from_control$iso1), "Sputum",
ifelse(grepl("BAL", path_bray_dist_long_within_sampleid_from_control$iso1), "BAL", NA)))
label <- c("BAL","Nasal swab","Sputum")
names(label) <- c("BAL","nasal_swab","Sputum")
ggplot(path_bray_dist_long_within_sampleid_from_control, aes(y = dist, fill = treatment)) +
geom_boxplot() +
#scale_fill_manual(values = c(viridis(6)[2:6])) +
scale_fill_manual(values = c("#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment", labels = c("lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Sample pair distances") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "F") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type, labeller = labeller(sample_type = label))
Function PERMANOVA test results
Treatment as categorized group
dist ~ sample_type + log10(Final_reads) + treatment, strata = subject
No significant changes were observed.
bray_perm_uni_strata %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html", caption = "Subject id as strata term") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 1.557 | 0.344 | 35.378 | 0.000 |
|
| log10(Final reads) | 1 | 1.024 | 0.226 | 46.525 | 0.000 |
|
| Treatment | 5 | 0.124 | 0.027 | 1.124 | 0.525 | |
| Residual | 83 | 1.826 | 0.403 | NA | NA | |
| Total | 91 | 4.530 | 1.000 | NA | NA |
Function PERMAONVA (detailed treatment)
dist ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp, strata = subject
No significant changes were observed.
bray_perm_strata %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 1.557 | 0.344 | 35.378 | 0.000 |
|
| log10(Final reads) | 1 | 1.024 | 0.226 | 46.525 | 0.000 |
|
| lyPMA | 1 | 0.019 | 0.004 | 0.847 | 0.426 | |
| Benzonase | 1 | 0.008 | 0.002 | 0.382 | 0.521 | |
| Host zero | 1 | 0.008 | 0.002 | 0.372 | 0.565 | |
| Molysis | 1 | 0.087 | 0.019 | 3.973 | 0.053 | |
| QIAamp | 1 | 0.001 | 0.000 | 0.044 | 0.962 | |
| Residual | 83 | 1.826 | 0.403 | NA | NA | |
| Total | 91 | 4.530 | 1.000 | NA | NA |
QIAamp showed highest changes. But, it could be sample type specific.
Function interaction term
dist ~ sample_type * treatment + log10(Final_reads), strata = subject
Some changes were treatment induced?
We don’t have to run stratified anlysis
bray_perm_inter %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "sample_type:treatment" ~ 'Sample type * treatment',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 1.557 | 0.344 | 38.648 | 0.000 |
|
| Treatment | 5 | 0.684 | 0.151 | 6.791 | 0.000 |
|
| log10(Final reads) | 1 | 0.463 | 0.102 | 23.010 | 0.001 |
|
| Sample type * treatment | 10 | 0.356 | 0.079 | 1.767 | 0.128 | |
| Residual | 73 | 1.470 | 0.325 | NA | NA | |
| Total | 91 | 4.530 | 1.000 | NA | NA |
Stratified (NS)
Stratified analysis not matching with boxplot results
bray_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.011 | 0.042 | 2.193 | 0.145 | |
| Benzonase | 1 | 0.011 | 0.040 | 2.076 | 0.147 | |
| Host zero | 1 | 0.009 | 0.033 | 1.728 | 0.189 | |
| Molysis | 1 | 0.014 | 0.052 | 2.746 | 0.088 | |
| QIAamp | 1 | 0.054 | 0.197 | 10.324 | 0.002 |
|
| log10(Final reads) | 1 | 0.028 | 0.103 | 5.405 | 0.023 |
|
| Residual | 28 | 0.147 | 0.534 | NA | NA | |
| Total | 34 | 0.275 | 1.000 | NA | NA |
Stratified (BAL)
Stratified analysis not matching with boxplot results
bray_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.040 | 0.020 | 0.800 | 0.344 | |
| Benzonase | 1 | 0.115 | 0.058 | 2.279 | 0.091 | |
| Host zero | 1 | 0.027 | 0.014 | 0.543 | 0.403 | |
| Molysis | 1 | 0.180 | 0.091 | 3.569 | 0.046 |
|
| QIAamp | 1 | 0.001 | 0.000 | 0.018 | 0.938 | |
| log10(Final reads) | 1 | 0.603 | 0.306 | 11.980 | 0.082 | |
| Residual | 20 | 1.007 | 0.510 | NA | NA | |
| Total | 26 | 1.973 | 1.000 | NA | NA |
Stratified (spt)
Stratified analysis not matching with boxplot results
bray_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.017 | 0.023 | 3.773 | 0.062 | |
| Benzonase | 1 | 0.002 | 0.003 | 0.423 | 0.580 | |
| Host zero | 1 | 0.064 | 0.089 | 14.506 | 0.004 |
|
| Molysis | 1 | 0.139 | 0.191 | 31.291 | 0.000 |
|
| QIAamp | 1 | 0.370 | 0.509 | 83.348 | 0.000 |
|
| log10(Final reads) | 1 | 0.032 | 0.045 | 7.303 | 0.010 |
|
| Residual | 23 | 0.102 | 0.141 | NA | NA | |
| Total | 29 | 0.726 | 1.000 | NA | NA |
Results:
A9. DA analysis for taxa, by sample type and treatment
Both stratified and nonstratified were conducted.
MaAsLin condition:
Transformation: log transform
Normalization: None - as functional hits were normalized as RPKM already.
https://forum.biobakery.org/t/maaslin-with-shortbred-results-and-panphlan/3102
Results
#DA analysis - MaAslin
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
#Running MaAslin for all sample without decontam
#for taxa differentially abundant by host depletion method, look to see which ones overlap with potential contaminant taxa
# Maaslin - # # y ~ log(final reads) + sample_type + treatment -----------
#all samples
MaAslin without interaction - volcano plot
Again, most of DA functions were sample type specific
#Making significance table for figure
# Define a function to make species names italicized
# Make a significance table for each figure (top 20 taxa)
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$min <- apply(sig_data, 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% select("feature", "lypma", "benzonase", "host_zero", "molysis", "qiaamp") %>% .[1:20,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>% column_to_rownames(var = "feature") %>% select(-c("-"))
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA) %>% data.frame %>%
rename(lyPMA = lypma, Benzonase = benzonase, `Host zero` = host_zero, Molysis = molysis, QIAamp = qiaamp)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
fit_data_bal <- make_sig_table(fit_data_bal)
fit_data_ns <- make_sig_table(fit_data_ns)
fit_data_spt <- make_sig_table(fit_data_spt)
spt_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Sputum")) %>%
gsub("-", ".", .) %in% fit_data_spt$data$feature)
fit_data_spt$rel <- cbind(spt_sig %>% otu_table %>% t, spt_sig %>% sample_data) %>%
group_by(treatment) %>%
summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>%
column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
rownames_to_column("row") %>% mutate(row = gsub("-", ".", row)) %>% column_to_rownames("row") %>%
.[row.names(fit_data_spt$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
ns_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab"))%>%
gsub("-", ".", .) %in% fit_data_ns$data$feature)
fit_data_ns$rel <- cbind(ns_sig %>% otu_table %>% t, ns_sig %>% sample_data) %>%
group_by(treatment) %>%
summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>%
column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
rownames_to_column("row") %>% mutate(row = gsub("-", ".", row)) %>% column_to_rownames("row") %>%
.[row.names(fit_data_ns$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
bal_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "BAL"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "BAL")) %>%
gsub("-", ".", .) %in% fit_data_bal$data$feature)
fit_data_bal$rel <- cbind(bal_sig %>% otu_table %>% t, bal_sig %>% sample_data) %>%
group_by(treatment) %>%
summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>%
column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
rownames_to_column("row") %>% mutate(row = gsub("-", ".", row)) %>% column_to_rownames("row") %>%
.[row.names(fit_data_bal$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
#Volcano plot
ggplot(maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
labs(tag = "A") +
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#4daf4a", "#984ea3", "#f781bf", "#377eb8", "#ff7f00", "#ffff33", "#a65628"),
breaks = c("log10.Final_reads", "sample_type", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("log10 (Final reads)", "Sample type", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
guides(col = guide_legend(title = "Covariates", title.position = "top", nrow = 2))
Most of the DA function were sample type dependent.
MaAsLin table (function)
Large number of functions were differentially aubundant.
maaslin_all %>% subset(., .$qval < 0.1) %>% .$metadata %>% table
## .
## benzonase host_zero log10.Final_reads lypma
## 100 125 162 90
## molysis qiaamp sample_type
## 185 66 288
Stratified analysis is required.
Baloon plot - nasal swabs
Similarly, few functions were newly discovered
ggballoonplot(fit_data_ns$rel %>% gather(treatment, value, Control:QIAamp, factor_key=TRUE), fill = "value", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
scale_fill_viridis() +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "B") +
theme(panel.grid.major = element_line(colour = "grey"), axis.text.x = element_text(vjust = 0.5, hjust=0.5), axis.text.y = ggtext::element_markdown()) +
geom_text(data = fit_data_ns$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment, significance, lyPMA:QIAamp, factor_key=TRUE) %>%
subset(., !is.na(.$significance)),
aes(y = feature, x = treatment, label = significance, col = significance), hjust = -2, vjust = 0.8, size = 5) +
guides(col = guide_legend(nrow = 3, override.aes = aes(label = "*", size = 10, color = "red"), title="Significance", title.position = "top", order = 3), size = guide_legend(title = "Relative abundance", title.position = "top", order = 1, nrow = 2), fill = F) +
scale_x_discrete(labels=c("control" = "Control", "lypma" = "lyPMA", "benzonase" = "Benzonase", "host_zero" = "Host-zero", "molysis" = "Molysis", "qiaamp" = "QIAamp")) +
scale_color_manual(values = c("red"), labels = c(expression(paste(italic("q"), "-value < 0.1", sep = ""))))
Baloon plot - Sputum
Similarly, few functions were newly discovered
ggballoonplot(fit_data_spt$rel %>% gather(treatment, value, Control:QIAamp, factor_key=TRUE), fill = "value", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
scale_fill_viridis() +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "C") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "none", axis.text.x = element_text(vjust = 0.5, hjust=0.5),
axis.text.y = ggtext::element_markdown()) +
geom_text(data = fit_data_spt$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment, significance, lyPMA:QIAamp, factor_key=TRUE) %>%
subset(., !is.na(.$significance)),
aes(y = feature, x = treatment, label = significance, col = significance), hjust = -2, vjust = 0.8, size = 5) +
guides(col = guide_legend(nrow = 3, override.aes = aes(label = "*", size = 10, color = "red"), title="Significance", title.position = "top", order = 3), size = guide_legend(title = "Relative abundance", title.position = "top", order = 1, nrow = 2), fill = F) +
scale_x_discrete(labels=c("control" = "Control", "lypma" = "lyPMA", "benzonase" = "Benzonase", "host_zero" = "Host-zero", "molysis" = "Molysis", "qiaamp" = "QIAamp")) +
scale_color_manual(values = c("red"), labels = c(expression(paste(italic("q"), "-value < 0.1", sep = ""))))
Baloon plot - BAL
Some functions were newly discovered.
ggballoonplot(fit_data_bal$rel %>% gather(treatment, value, Control:QIAamp, factor_key=TRUE), fill = "value", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
scale_fill_viridis() +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "none", axis.text.x = element_text(vjust = 0.5, hjust=0.5),
axis.text.y = ggtext::element_markdown()) +
geom_text(data = fit_data_bal$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment, significance, lyPMA:QIAamp, factor_key=TRUE) %>%
subset(., !is.na(.$significance)),
aes(y = feature, x = treatment, label = significance, col = significance), hjust = -2, vjust = 0.8, size = 5) +
guides(col = guide_legend(nrow = 3, override.aes = aes(label = "*", size = 10, color = "red"), title="Significance", title.position = "top", order = 3), size = guide_legend(title = "Relative abundance", title.position = "top", order = 1, nrow = 2), fill = F) +
scale_x_discrete(labels=c("control" = "Control", "lypma" = "lyPMA", "benzonase" = "Benzonase", "host_zero" = "Host-zero", "molysis" = "Molysis", "qiaamp" = "QIAamp")) +
scale_color_manual(values = c("red"), labels = c(expression(paste(italic("q"), "-value < 0.1", sep = ""))))
Results After adding control data, MaAslin needs to be reanalyzed. Adding controls (mock communities) for each treatment group will show more statistically valid results in y ~ log(final reads) + sample_type + treatment, (re = subject_id))
MaAslin with interaction
Figure and table of differentially abundant function.
#Generating interaction term
sample_data(phyloseq_rel_nz)$sampletype_treatment <- paste(sample_data(phyloseq_rel_nz)$sample_type, sample_data(phyloseq_rel_nz)$treatment, sep = "*")
#interaction term - ggplot
ggplot(maaslin_interaction, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
#labs(tag = "A") +
ggtitle("MaAslin with interaction term")+
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3")) +
guides(col = guide_legend(title = "Fixed effects", title.position = "top", nrow = 1))
#Checking number of bugs differentially abundance with interaction term
cat("Number of differentially abundant bugs by each metadata")
## Number of differentially abundant bugs by each metadata
maaslin_interaction %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## log10.Final_reads sample_type sampletype_treatment
## 74 262 513
## treatment
## 189
MaAsLin interaction analysis
cat("Some taxa were increased by each treatmment.\n But they are not contaminants, \nif they are present in most of the treatments")
## Some taxa were increased by each treatmment.
## But they are not contaminants,
## if they are present in most of the treatments
maaslin_interaction %>% subset(., .$qval < 0.1 & .$metadata == "treatment") %>% .$feature %>% table %>% data.frame %>% arrange(-Freq) %>% rename(Feature = ".") %>% kbl(format = "html", caption = "Table of taxa differentially abundant by treatment") %>%
kable_styling(full_width = 0, html_font = "serif")
| Feature | Freq |
|---|---|
| PWY.7323 | 5 |
| PPGPPMET.PWY | 4 |
| PWY.6519 | 4 |
| PWY.7204 | 4 |
| PWY0.1479 | 4 |
| PWY0.1586 | 4 |
| COLANSYN.PWY | 3 |
| FAO.PWY | 3 |
| GLYOXYLATE.BYPASS | 3 |
| OANTIGEN.PWY | 3 |
| P105.PWY | 3 |
| P4.PWY | 3 |
| PWY.1269 | 3 |
| PWY.5136 | 3 |
| PWY.5675 | 3 |
| PWY.5690 | 3 |
| PWY.5747 | 3 |
| PWY.5855 | 3 |
| PWY.5856 | 3 |
| PWY.5857 | 3 |
| PWY.6318 | 3 |
| PWY.6708 | 3 |
| PWY.6969 | 3 |
| PWY.7663 | 3 |
| PWY0.781 | 3 |
| TCA.GLYOX.BYPASS | 3 |
| TYRFUMCAT.PWY | 3 |
| FASYN.ELONG.PWY | 2 |
| FERMENTATION.PWY | 2 |
| GLUCONEO.PWY | 2 |
| GLYCOGENSYNTH.PWY | 2 |
| GLYCOLYSIS.E.D | 2 |
| GLYCOLYSIS.TCA.GLYOX.BYPASS | 2 |
| HOMOSER.METSYN.PWY | 2 |
| MET.SAM.PWY | 2 |
| METSYN.PWY | 2 |
| NAGLIPASYN.PWY | 2 |
| PWY.241 | 2 |
| PWY.5083 | 2 |
| PWY.5347 | 2 |
| PWY.5676 | 2 |
| PWY.5695 | 2 |
| PWY.5913 | 2 |
| PWY.5973 | 2 |
| PWY.6282 | 2 |
| PWY.6595 | 2 |
| PWY.6606 | 2 |
| PWY.6608 | 2 |
| PWY.6703 | 2 |
| PWY.7282 | 2 |
| PWY.7664 | 2 |
| PWY0.1261 | 2 |
| PWY0.845 | 2 |
| PWY4FS.7 | 2 |
| PWY4FS.8 | 2 |
| PWY66.389 | 2 |
| PYRIDOXSYN.PWY | 2 |
| ASPASN.PWY | 1 |
| BIOTIN.BIOSYNTHESIS.PWY | 1 |
| CALVIN.PWY | 1 |
| CITRULBIO.PWY | 1 |
| GLYCOL.GLYOXDEG.PWY | 1 |
| PENTOSE.P.PWY | 1 |
| PWY.1042 | 1 |
| PWY.4984 | 1 |
| PWY.5100 | 1 |
| PWY.5104 | 1 |
| PWY.5154 | 1 |
| PWY.5173 | 1 |
| PWY.5189 | 1 |
| PWY.5265 | 1 |
| PWY.5345 | 1 |
| PWY.561 | 1 |
| PWY.5667 | 1 |
| PWY.5871 | 1 |
| PWY.5873 | 1 |
| PWY.5989 | 1 |
| PWY.6126 | 1 |
| PWY.6353 | 1 |
| PWY.6471 | 1 |
| PWY.6545 | 1 |
| PWY.6700 | 1 |
| PWY.6859 | 1 |
| PWY.6936 | 1 |
| PWY.7198 | 1 |
| PWY.7229 | 1 |
| PWY.7237 | 1 |
| PWY.7254 | 1 |
| PWY.841 | 1 |
| PWY0.1319 | 1 |
| PWY0.862 | 1 |
| PWY1G.0 | 1 |
| PWY3O.19 | 1 |
| PWY66.399 | 1 |
| PWY66.400 | 1 |
| PWY66.409 | 1 |
| PWYG.321 | 1 |
| SALVADEHYPOX.PWY | 1 |
cat("Most of taxa were found on most of treatments.")
## Most of taxa were found on most of treatments.
cat("Some taxa were treatment specific, only to one group")
## Some taxa were treatment specific, only to one group
subset(maaslin_interaction, maaslin_interaction$feature %in% (maaslin_interaction %>% subset(., .$qval < 0.1 & .$metadata == "treatment") %>%
.$feature %>% table %>% data.frame %>% subset(., Freq == 1) %>% .$. %>% as.character())) %>% subset(., .$qval < 0.1) %>% select(c("feature", "metadata", "value", "coef", "qval")) %>% subset(., .$metadata == "treatment") %>%
remove_rownames() %>% kbl(format = "html", caption = "Table of taxa specific to one treatment group") %>%
kable_styling(full_width = 0, html_font = "serif")
| feature | metadata | value | coef | qval |
|---|---|---|---|---|
| PWY66.399 | treatment | lyPMA | -2.854232 | 0.0028504 |
| PWY.6700 | treatment | lyPMA | 4.327262 | 0.0036901 |
| ASPASN.PWY | treatment | lyPMA | 4.401548 | 0.0043963 |
| PWY.4984 | treatment | lyPMA | 4.156375 | 0.0048323 |
| PWY.5104 | treatment | lyPMA | 3.954702 | 0.0064690 |
| PWY66.400 | treatment | lyPMA | -2.253399 | 0.0098358 |
| PWY.5173 | treatment | lyPMA | 3.918380 | 0.0103995 |
| PWY.7229 | treatment | lyPMA | -2.166633 | 0.0111767 |
| PWY.7198 | treatment | lyPMA | -1.914683 | 0.0118071 |
| PWY.7237 | treatment | QIAamp | 3.951831 | 0.0140510 |
| PWY.5989 | treatment | Molysis | 3.862064 | 0.0162354 |
| PWY.6126 | treatment | lyPMA | -2.066399 | 0.0163405 |
| PWY.841 | treatment | lyPMA | -2.134628 | 0.0192771 |
| PWY.5154 | treatment | lyPMA | 3.908505 | 0.0206660 |
| PWY.6545 | treatment | lyPMA | -3.876545 | 0.0236477 |
| PENTOSE.P.PWY | treatment | lyPMA | -2.504045 | 0.0237785 |
| PWY.5100 | treatment | lyPMA | 2.926325 | 0.0239820 |
| PWY.5871 | treatment | QIAamp | 2.286020 | 0.0262192 |
| PWY.5873 | treatment | QIAamp | 2.286020 | 0.0262192 |
| PWY0.862 | treatment | Molysis | 3.858948 | 0.0360961 |
| PWY.561 | treatment | Host zero | 3.679261 | 0.0364917 |
| PWY.6859 | treatment | lyPMA | -2.365648 | 0.0434897 |
| CALVIN.PWY | treatment | lyPMA | 2.022717 | 0.0485584 |
| PWY0.1319 | treatment | QIAamp | -1.642528 | 0.0486151 |
| PWY.5667 | treatment | QIAamp | -1.638861 | 0.0489790 |
| PWY.5345 | treatment | Molysis | 2.474248 | 0.0501101 |
| CITRULBIO.PWY | treatment | lyPMA | 2.924422 | 0.0501892 |
| PWYG.321 | treatment | Molysis | 3.590886 | 0.0574639 |
| PWY.6936 | treatment | QIAamp | -1.478388 | 0.0591830 |
| PWY.7254 | treatment | Molysis | 3.637160 | 0.0640160 |
| PWY.1042 | treatment | lyPMA | -1.599141 | 0.0657547 |
| PWY.6353 | treatment | lyPMA | 2.903217 | 0.0658259 |
| BIOTIN.BIOSYNTHESIS.PWY | treatment | Host zero | 3.114788 | 0.0691854 |
| PWY3O.19 | treatment | QIAamp | 2.074899 | 0.0738120 |
| PWY.5189 | treatment | lyPMA | -1.591600 | 0.0759409 |
| SALVADEHYPOX.PWY | treatment | lyPMA | 2.710844 | 0.0771021 |
| GLYCOL.GLYOXDEG.PWY | treatment | Benzonase | 1.709435 | 0.0777447 |
| PWY.6471 | treatment | Molysis | 2.917857 | 0.0784918 |
| PWY66.409 | treatment | lyPMA | -1.869095 | 0.0789112 |
| PWY.5265 | treatment | Molysis | 2.679060 | 0.0908175 |
| PWY1G.0 | treatment | QIAamp | -3.428476 | 0.0934398 |
Final results summary
Sequencing results
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal swab", "Sputum"),
lyPMA = c("No increase in final reads",
"No increase in final reads",
"No increase in final reads"),
Benzonase = c("No decrease in host %",
"No decrease in host %",
"No decrease in host %"),
`Host zero` = c(NA,
NA,
NA),
Molysis = c("No decrease in host %",
"High cahnge of failure in library pep",
NA),
QIAamp = c("No decrease in host %",
NA,
"No decrease in host %")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of issues of each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | No increase in final reads | No decrease in host % | NA | No decrease in host % | No decrease in host % |
| Nasal swab | No increase in final reads | No decrease in host % | NA | High cahnge of failure in library pep | NA |
| Sputum | No increase in final reads | No decrease in host % | NA | NA | No decrease in host % |
Diversity changes (taxa)
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal swab", "Sputum"),
lyPMA = c(NA,
"Beta changed",
"Shannon +"),
Benzonase = c(NA,
NA,
"Richness + Shannon + InvSimp +"),
`Host zero` = c(NA,
"Richness + Shannon + InvSimp + BPI -",
NA),
Molysis = c(NA,
"Richness + Shannon + InvSimp + BPI -",
"Beta changed"),
QIAamp = c("Beta changed",
NA,
"Beta changed")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of community changes induced by each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | NA | NA | NA | NA | Beta changed |
| Nasal swab | Beta changed | NA | Richness + Shannon + InvSimp + BPI - | Richness + Shannon + InvSimp + BPI - | NA |
| Sputum | Shannon + | Richness + Shannon + InvSimp + | NA | Beta changed | Beta changed |
Diversity changes (function)
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal swab", "Sputum"),
lyPMA = c(NA,
NA,
"Shannon +"),
Benzonase = c(NA,
NA,
"Shannon +"),
`Host zero` = c(NA,
"Richness +",
"Shannon +"),
Molysis = c(NA,
"Richness + InvSimp + BPI +",
"Shannon +"),
QIAamp = c(NA,
"Richness + Shannon +",
"Shannon +")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of functional diversity changes induced by each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | NA | NA | NA | NA | NA |
| Nasal swab | NA | NA | Richness + | Richness + InvSimp + BPI + | Richness + Shannon + |
| Sputum | Shannon + | Shannon + | Shannon + | Shannon + | Shannon + |
Potential contaminants
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal swab", "Sputum"),
lyPMA = c("Listeria",
"Listeria",
"Listeria, Candida, Corynebacterium"),
Benzonase = c("Listeria",
"Listeria",
"Listeria, Candida, Corynebacterium"),
`Host zero` = c("Listeria",
"Listeria",
"Listeria, Candida, Corynebacterium"),
Molysis = c("Streptococcaceae, Listeria",
"Streptococcaceae, Listeria",
"Streptococcaceae, Listeria, Candida, Corynebacterium"),
QIAamp = c("Listeria",
"Listeria",
"Listeria, Candida, Corynebacterium")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of potential contaminants identified by decontam and DA analysis") %>%
kable_styling(full_width = 0, html_font = "serif") %>%
column_spec(2:6, italic = T) #%>%
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | Listeria | Listeria | Listeria | Streptococcaceae, Listeria | Listeria |
| Nasal swab | Listeria | Listeria | Listeria | Streptococcaceae, Listeria | Listeria |
| Sputum | Listeria, Candida, Corynebacterium | Listeria, Candida, Corynebacterium | Listeria, Candida, Corynebacterium | Streptococcaceae, Listeria, Candida, Corynebacterium | Listeria, Candida, Corynebacterium |
#row_spec(2:3, bold = T)
Conclusion
1. Effect of treatment was sample type specific.
2. Some methods (lyPMA) made samples failing in library prep.
3. One BAL sample failed in sequencing, but most of treatment enabled its sequencing
4. Alpha diversity and beta diversity were changed by some treatment, specific to some sample type.
5. DA analysis and decontam showed there were some potential contaminants
QIAamp for nasal swab, host zero for BAL and sputum successfully 1) incrased final reads, 2) lowered host %, and 3) did not change diversity matrices.
Molysis was effective in increasing efficiencies of sequencing sptum, however diversity matrices were significantly changed.
As our study contains potential contaminants, further analysis is required after adding data of controls.
Done.
Bibliography
#===============================================================================
#BTC.LineZero.Footer.1.1.0
#===============================================================================
#R markdown citation generator.
#===============================================================================
#RLB.Dependencies:
# magrittr, pacman, stringr
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#BTC.Dependencies:
# LineZero.Header
#===============================================================================
#Generates citations for each explicitly loaded library.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
str_libraries <- c("r", str_libraries)
for (str_libraries in str_libraries) {
str_libraries |>
pacman::p_citation() |>
print(bibtex = FALSE) |>
capture.output() %>%
.[-1:-3] %>% .[. != ""] |>
stringr::str_squish() |>
stringr::str_replace("_", "") |>
cat()
cat("\n")
}
## R Core Team (2022). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. We have invested a lot of time and effort in creating R, please cite it when using it for data analysis. See also 'citation("pkgname")' for citing R packages.
## Wickham H, Bryan J (2023). readxl: Read Excel Files_. R package version 1.4.2, <https://CRAN.R-project.org/package=readxl>.
## phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data. Paul J. McMurdie and Susan Holmes (2013) PLoS ONE 8(4):e61217.
## Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." Journal of Open Source Software_, *4*(43), 1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
## Rinker, T. W. & Kurkiewicz, D. (2017). pacman: Package Management for R. version 0.5.0. Buffalo, New York. http://github.com/trinker/pacman
## Garbett SP, Stephens J, Simonov K, Xie Y, Dong Z, Wickham H, Horner J, reikoch, Beasley W, O'Connor B, Warnes GR, Quinn M, Kamvar ZN (2023). yaml: Methods to Convert R Data to YAML and Back_. R package version 2.3.7, <https://CRAN.R-project.org/package=yaml>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.
## H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.
## Oksanen J, Simpson G, Blanchet F, Kindt R, Legendre P, Minchin P, O'Hara R, Solymos P, Stevens M, Szoecs E, Wagner H, Barbour M, Bedward M, Bolker B, Borcard D, Carvalho G, Chirico M, De Caceres M, Durand S, Evangelista H, FitzJohn R, Friendly M, Furneaux B, Hannigan G, Hill M, Lahti L, McGlinn D, Ouellette M, Ribeiro Cunha E, Smith T, Stier A, Ter Braak C, Weedon J (2022). vegan: Community Ecology Package. R package version 2.6-4, <https://CRAN.R-project.org/package=vegan>.
## Leo Lahti et al. microbiome R package. URL: http://microbiome.github.io
## Kassambara A (2023). ggpubr: 'ggplot2' Based Publication Ready Plots. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
## Simon Garnier, Noam Ross, Robert Rudis, Antônio P. Camargo, Marco Sciaini, and Cédric Scherer (2021). Rvision - Colorblind-Friendly Color Maps for R. R package version 0.6.2.
## Davis NM, Proctor D, Holmes SP, Relman DA, Callahan BJ (2017). "Simple statistical identification and removal of contaminant sequences in marker-gene and metagenomics data." bioRxiv_, 221499. doi:10.1101/221499 <https://doi.org/10.1101/221499>.
## Auguie B (2017). gridExtra: Miscellaneous Functions for "Grid" Graphics. R package version 2.3, <https://CRAN.R-project.org/package=gridExtra>.
## Kassambara A (2023). ggpubr: 'ggplot2' Based Publication Ready Plots. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
## Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker (2015). Fitting Linear Mixed-Effects Models Using lme4. Journal of Statistical Software, 67(1), 1-48. doi:10.18637/jss.v067.i01.
## Kuznetsova A, Brockhoff PB, Christensen RHB (2017). "lmerTest Package: Tests in Linear Mixed Effects Models." Journal of Statistical Software, *82*(13), 1-26. doi:10.18637/jss.v082.i13 <https://doi.org/10.18637/jss.v082.i13>.
## Ooms J (2023). writexl: Export Data Frames to Excel 'xlsx' Format_. R package version 1.4.2, <https://CRAN.R-project.org/package=writexl>.
## Gonçalves da Silva A (2017). harrietr: Wrangle Phylogenetic Distance Matrices and Other Utilities. R package version 0.2.3, <https://CRAN.R-project.org/package=harrietr>.
## Mallick H et al. (2020). Multivariable Association in Population-scale Meta-omics Studies, http://huttenhower.sph.harvard.edu/maaslin2. To cite the MaAsLin 2 software, please use: Mallick H, Rahnavard A, McIver LJ (2020). MaAsLin 2: Multivariable Association in Population-scale Meta-omics Studies. R/Bioconductor package, http://huttenhower.sph.harvard.edu/maaslin2.
## Wilke C, Wiernik B (2022). ggtext: Improved Text Rendering Support for 'ggplot2'. R package version 0.1.2, <https://CRAN.R-project.org/package=ggtext>.
## Aphalo P (2022). ggpmisc: Miscellaneous Extensions to 'ggplot2'_. R package version 0.5.2, <https://CRAN.R-project.org/package=ggpmisc>.
## Auguie B (2017). gridExtra: Miscellaneous Functions for "Grid" Graphics. R package version 2.3, <https://CRAN.R-project.org/package=gridExtra>.
## Wood S, Scheipl F (2020). gamm4: Generalized Additive Mixed Models using 'mgcv' and 'lme4'. R package version 0.2-6, <https://CRAN.R-project.org/package=gamm4>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.
## Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.
## Zhu H (2021). kableExtra: Construct Complex Table with 'kable' and Pipe Syntax. R package version 1.3.4, <https://CRAN.R-project.org/package=kableExtra>.
## Yihui Xie (2023). knitr: A General-Purpose Package for Dynamic Report Generation in R. R package version 1.42. Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition. Chapman and Hall/CRC. ISBN 978-1498716963 Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible Research in R. In Victoria Stodden, Friedrich Leisch and Roger D. Peng, editors, Implementing Reproducible Computational Research. Chapman and Hall/CRC. ISBN 978-1466561595
## Guangchuang Yu. (2022). Data Integration, Manipulation and Visualization of Phylogenetic Trees (1st edition). Chapman and Hall/CRC. Shuangbin Xu, Lin Li, Xiao Luo, Meijun Chen, Wenli Tang, Li Zhan, Zehan Dai, Tommy T. Lam, Yi Guan, Guangchuang Yu. Ggtree: A serialized data object for visualization of a phylogenetic tree and annotation data. iMeta 2022, 4(1):e56. doi:10.1002/imt2.56 Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics, 2020, 69:e96. doi: 10.1002/cpbi.96 Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(2):3041-3043. doi: 10.1093/molbev/msy194 Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## John Fox and Sanford Weisberg (2019). An {R} Companion to Applied Regression, Third Edition. Thousand Oaks CA: Sage. URL: https://socialsciences.mcmaster.ca/jfox/Books/Companion/
#===============================================================================